From 76c7217000cd9d7e9adaed1ca90ed36ab1966703 Mon Sep 17 00:00:00 2001 From: Lonami Exo Date: Wed, 1 Aug 2018 00:37:25 +0200 Subject: [PATCH] Support downloading web documents --- telethon/client/downloads.py | 74 ++++++++++++++++++++++++++++++------ telethon/utils.py | 3 +- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/telethon/client/downloads.py b/telethon/client/downloads.py index abf7f7c9..85d3375c 100644 --- a/telethon/client/downloads.py +++ b/telethon/client/downloads.py @@ -8,6 +8,12 @@ from .users import UserMethods from .. import utils, helpers, errors from ..tl import TLObject, types, functions +try: + import aiohttp +except ImportError: + aiohttp = None + + __log__ = logging.getLogger(__name__) @@ -140,6 +146,10 @@ class DownloadMethods(UserMethods): return self._download_contact( media, file ) + elif isinstance(media, (types.WebDocument, types.WebDocumentNoProxy)): + return await self._download_web_document( + media, file, progress_callback + ) async def download_file( self, input_location, file=None, *, part_size_kb=None, @@ -298,19 +308,12 @@ class DownloadMethods(UserMethods): progress_callback=progress_callback) return file - async def _download_document( - self, document, file, date, progress_callback): - """Specialized version of .download_media() for documents.""" - if isinstance(document, types.MessageMediaDocument): - document = document.document - if not isinstance(document, types.Document): - return - - file_size = document.size - + @staticmethod + def _get_kind_and_names(attributes): + """Gets kind and possible names for :tl:`DocumentAttribute`.""" kind = 'document' possible_names = [] - for attr in document.attributes: + for attr in attributes: if isinstance(attr, types.DocumentAttributeFilename): possible_names.insert(0, attr.file_name) @@ -327,13 +330,24 @@ class DownloadMethods(UserMethods): elif attr.voice: kind = 'voice' + return kind, possible_names + + async def _download_document( + self, document, file, date, progress_callback): + """Specialized version of .download_media() for documents.""" + if isinstance(document, types.MessageMediaDocument): + document = document.document + if not isinstance(document, types.Document): + return + + kind, possible_names = self._get_kind_and_names(document.attributes) file = self._get_proper_filename( file, kind, utils.get_extension(document), date=date, possible_names=possible_names ) await self.download_file( - document, file, file_size=file_size, + document, file, file_size=document.size, progress_callback=progress_callback) return file @@ -373,6 +387,42 @@ class DownloadMethods(UserMethods): return file + @classmethod + async def _download_web_document(cls, web, file, progress_callback): + """ + Specialized version of .download_media() for web documents. + """ + if not aiohttp: + raise ValueError( + 'Cannot download web documents without the aiohttp ' + 'dependency install it (pip install aiohttp)' + ) + + # TODO Better way to get opened handles of files and auto-close + if isinstance(file, str): + kind, possible_names = cls._get_kind_and_names(web.attributes) + file = cls._get_proper_filename( + file, kind, utils.get_extension(web), + possible_names=possible_names + ) + f = open(file, 'wb') + else: + f = file + + try: + with aiohttp.ClientSession() as session: + # TODO Use progress_callback; get content length from response + # https://github.com/telegramdesktop/tdesktop/blob/c7e773dd9aeba94e2be48c032edc9a78bb50234e/Telegram/SourceFiles/ui/images.cpp#L1318-L1319 + async with session.get(web.url) as response: + while True: + chunk = await response.content.read(128 * 1024) + if not chunk: + break + f.write(chunk) + finally: + if isinstance(file, str): + f.close() + @staticmethod def _get_proper_filename(file, kind, extension, date=None, possible_names=None): diff --git a/telethon/utils.py b/telethon/utils.py index ee123b32..81bbe11d 100644 --- a/telethon/utils.py +++ b/telethon/utils.py @@ -92,7 +92,8 @@ def get_extension(media): # Documents will come with a mime type if isinstance(media, types.MessageMediaDocument): media = media.document - if isinstance(media, types.Document): + if isinstance(media, ( + types.Document, types.WebDocument, types.WebDocumentNoProxy)): if media.mime_type == 'application/octet-stream': # Octet stream are just bytes, which have no default extension return ''