Properly implement InputPhoto/InputDocument caching

Since uploading a file is done on the TelegramClient, and the
InputFiles are only valid for a short period of time, it only
makes sense to cache the sent media instead (which should not
expire). The problem is the MD5 is only needed when uploading
the file.

The solution is to allow this method to check for the wanted
cache, and if available, return an instance of that, so to
preserve the flexibility of both options (always InputFile,
or the cached InputPhoto/InputDocument) instead reuploading.
This commit is contained in:
Lonami Exo 2018-01-18 19:36:47 +01:00
parent 7e707dbbd9
commit 0e4611a593
2 changed files with 85 additions and 63 deletions

View File

@ -66,7 +66,8 @@ from .tl.types import (
UpdateNewChannelMessage, UpdateNewMessage, UpdateShortSentMessage, UpdateNewChannelMessage, UpdateNewMessage, UpdateShortSentMessage,
PeerUser, InputPeerUser, InputPeerChat, InputPeerChannel, MessageEmpty, PeerUser, InputPeerUser, InputPeerChat, InputPeerChannel, MessageEmpty,
ChatInvite, ChatInviteAlready, PeerChannel, Photo, InputPeerSelf, ChatInvite, ChatInviteAlready, PeerChannel, Photo, InputPeerSelf,
InputSingleMedia, InputMediaPhoto, InputPhoto, InputFile, InputFileBig InputSingleMedia, InputMediaPhoto, InputPhoto, InputFile, InputFileBig,
InputDocument, InputMediaDocument
) )
from .tl.types.messages import DialogsSlice from .tl.types.messages import DialogsSlice
from .extensions import markdown from .extensions import markdown
@ -875,7 +876,9 @@ class TelegramClient(TelegramBareClient):
allow_cache (:obj:`bool`, optional): allow_cache (:obj:`bool`, optional):
Whether to allow using the cached version stored in the Whether to allow using the cached version stored in the
database or not. Defaults to ``True`` to avoid reuploads. database or not. Defaults to ``True`` to avoid re-uploads.
Must be ``False`` if you wish to use different attributes
or thumb than those that were used when the file was cached.
Kwargs: Kwargs:
If "is_voice_note" in kwargs, despite its value, and the file is If "is_voice_note" in kwargs, despite its value, and the file is
@ -892,8 +895,7 @@ class TelegramClient(TelegramBareClient):
if all(utils.is_image(x) for x in file): if all(utils.is_image(x) for x in file):
return self._send_album( return self._send_album(
entity, file, caption=caption, entity, file, caption=caption,
progress_callback=progress_callback, reply_to=reply_to, progress_callback=progress_callback, reply_to=reply_to
allow_cache=allow_cache
) )
# Not all are images, so send all the files one by one # Not all are images, so send all the files one by one
return [ return [
@ -905,10 +907,20 @@ class TelegramClient(TelegramBareClient):
) for x in file ) for x in file
] ]
as_image = utils.is_image(file) and not force_document
use_cache = InputPhoto if as_image else InputDocument
file_handle = self.upload_file( file_handle = self.upload_file(
file, progress_callback=progress_callback, allow_cache=allow_cache) file, progress_callback=progress_callback,
use_cache=use_cache if allow_cache else None
)
if utils.is_image(file) and not force_document: if isinstance(file_handle, use_cache):
# File was cached, so an instance of use_cache was returned
if as_image:
media = InputMediaPhoto(file_handle, caption)
else:
media = InputMediaDocument(file_handle, caption)
elif as_image:
media = InputMediaUploadedPhoto(file_handle, caption) media = InputMediaUploadedPhoto(file_handle, caption)
else: else:
mime_type = None mime_type = None
@ -964,19 +976,19 @@ class TelegramClient(TelegramBareClient):
media=media, media=media,
reply_to_msg_id=self._get_reply_to(reply_to) reply_to_msg_id=self._get_reply_to(reply_to)
) )
try: msg = self._get_response_message(request, self(request))
return self._get_response_message(request, self(request)) if msg and isinstance(file_handle, InputFile):
except FilePartMissingError: # There was a response message and we didn't use cached
# After a while, cached files are invalidated and this # version, so cache whatever we just sent to the database.
# error is raised. The file needs to be uploaded again. # Note that the InputFile was modified to have md5/size.
if not allow_cache: md5, size = file_handle.md5, file_handle.size
raise if as_image:
return self.send_file( to_cache = utils.get_input_photo(msg.media.photo)
entity, file, allow_cache=False, else:
caption=caption, force_document=force_document, to_cache = utils.get_input_document(msg.media.document)
progress_callback=progress_callback, reply_to=reply_to, self.session.cache_file(md5, size, to_cache)
attributes=attributes, thumb=thumb, **kwargs
) return msg
def send_voice_note(self, entity, file, caption='', progress_callback=None, def send_voice_note(self, entity, file, caption='', progress_callback=None,
reply_to=None): reply_to=None):
@ -987,48 +999,44 @@ class TelegramClient(TelegramBareClient):
is_voice_note=()) # empty tuple is enough is_voice_note=()) # empty tuple is enough
def _send_album(self, entity, files, caption='', def _send_album(self, entity, files, caption='',
progress_callback=None, reply_to=None, progress_callback=None, reply_to=None):
allow_cache=True):
"""Specialized version of .send_file for albums""" """Specialized version of .send_file for albums"""
# We don't care if the user wants to avoid cache, we will use it
# anyway. Why? The cached version will be exactly the same thing
# we need to produce right now to send albums (uploadMedia), and
# cache only makes a difference for documents where the user may
# want the attributes used on them to change. Caption's ignored.
entity = self.get_input_entity(entity) entity = self.get_input_entity(entity)
reply_to = self._get_reply_to(reply_to) reply_to = self._get_reply_to(reply_to)
try:
# Need to upload the media first # Need to upload the media first, but only if they're not cached yet
media = [ media = []
self(UploadMediaRequest(entity, InputMediaUploadedPhoto( for file in files:
self.upload_file(file, allow_cache=allow_cache), # fh will either be InputPhoto or a modified InputFile
caption=caption fh = self.upload_file(file, use_cache=InputPhoto)
))) if not isinstance(fh, InputPhoto):
for file in files input_photo = utils.get_input_photo(self(UploadMediaRequest(
] entity, media=InputMediaUploadedPhoto(fh, caption)
# Now we can construct the multi-media request )).photo)
result = self(SendMultiMediaRequest( self.session.cache_file(fh.md5, fh.size, input_photo)
entity, reply_to_msg_id=reply_to, multi_media=[ fh = input_photo
InputSingleMedia(InputMediaPhoto( media.append(InputSingleMedia(InputMediaPhoto(fh, caption)))
InputPhoto(m.photo.id, m.photo.access_hash),
caption=caption # Now we can construct the multi-media request
)) result = self(SendMultiMediaRequest(
for m in media entity, reply_to_msg_id=reply_to, multi_media=media
] ))
)) return [
return [ self._get_response_message(update.id, result)
self._get_response_message(update.id, result) for update in result.updates
for update in result.updates if isinstance(update, UpdateMessageID)
if isinstance(update, UpdateMessageID) ]
]
except FilePartMissingError:
if not allow_cache:
raise
return self._send_album(
entity, files, allow_cache=False, caption=caption,
progress_callback=progress_callback, reply_to=reply_to
)
def upload_file(self, def upload_file(self,
file, file,
part_size_kb=None, part_size_kb=None,
file_name=None, file_name=None,
allow_cache=True, use_cache=None,
progress_callback=None): progress_callback=None):
""" """
Uploads the specified file and returns a handle (an instance of Uploads the specified file and returns a handle (an instance of
@ -1058,15 +1066,20 @@ class TelegramClient(TelegramBareClient):
If not specified, the name will be taken from the ``file`` If not specified, the name will be taken from the ``file``
and if this is not a ``str``, it will be ``"unnamed"``. and if this is not a ``str``, it will be ``"unnamed"``.
allow_cache (:obj:`bool`, optional): use_cache (:obj:`type`, optional):
Whether to allow reusing the file from cache or not. Unused. The type of cache to use (currently either ``InputDocument``
or ``InputPhoto``). If present and the file is small enough
to need the MD5, it will be checked against the database,
and if a match is found, the upload won't be made. Instead,
an instance of type ``use_cache`` will be returned.
progress_callback (:obj:`callable`, optional): progress_callback (:obj:`callable`, optional):
A callback function accepting two parameters: A callback function accepting two parameters:
``(sent bytes, total)``. ``(sent bytes, total)``.
Returns: Returns:
The InputFile (or InputFileBig if >10MB). The InputFile (or InputFileBig if >10MB) with two extra
attributes: ``.md5`` (its ``.digest()``) and ``size``.
""" """
if isinstance(file, (InputFile, InputFileBig)): if isinstance(file, (InputFile, InputFileBig)):
return file # Already uploaded return file # Already uploaded
@ -1102,6 +1115,7 @@ class TelegramClient(TelegramBareClient):
# Determine whether the file is too big (over 10MB) or not # Determine whether the file is too big (over 10MB) or not
# Telegram does make a distinction between smaller or larger files # Telegram does make a distinction between smaller or larger files
is_large = file_size > 10 * 1024 * 1024 is_large = file_size > 10 * 1024 * 1024
hash_md5 = hashlib.md5()
if not is_large: if not is_large:
# Calculate the MD5 hash before anything else. # Calculate the MD5 hash before anything else.
# As this needs to be done always for small files, # As this needs to be done always for small files,
@ -1110,9 +1124,13 @@ class TelegramClient(TelegramBareClient):
if isinstance(file, str): if isinstance(file, str):
with open(file, 'rb') as stream: with open(file, 'rb') as stream:
file = stream.read() file = stream.read()
hash_md5 = hashlib.md5(file) hash_md5.update(file)
else: if use_cache:
hash_md5 = None cached = self.session.get_file(
hash_md5.digest(), file_size, cls=use_cache
)
if cached:
return cached
part_count = (file_size + part_size - 1) // part_size part_count = (file_size + part_size - 1) // part_size
__log__.info('Uploading file of %d bytes in %d chunks of %d', __log__.info('Uploading file of %d bytes in %d chunks of %d',
@ -1143,10 +1161,14 @@ class TelegramClient(TelegramBareClient):
'Failed to upload file part {}.'.format(part_index)) 'Failed to upload file part {}.'.format(part_index))
if is_large: if is_large:
return InputFileBig(file_id, part_count, file_name) result = InputFileBig(file_id, part_count, file_name)
else: else:
return InputFile(file_id, part_count, file_name, result = InputFile(file_id, part_count, file_name,
md5_checksum=hash_md5.hexdigest()) md5_checksum=hash_md5.hexdigest())
result.md5 = hash_md5.digest()
result.size = file_size
return result
# endregion # endregion

View File

@ -457,7 +457,7 @@ class Session:
with self._db_lock: with self._db_lock:
self._conn.execute( self._conn.execute(
'insert into sent_files values (?,?,?,?,?)', ( 'insert or replace into sent_files values (?,?,?,?,?)', (
md5_digest, file_size, md5_digest, file_size,
_SentFileType.from_type(type(instance)).value, _SentFileType.from_type(type(instance)).value,
instance.id, instance.access_hash instance.id, instance.access_hash