From 76c7217000cd9d7e9adaed1ca90ed36ab1966703 Mon Sep 17 00:00:00 2001
From: Lonami Exo <totufals@hotmail.com>
Date: Wed, 1 Aug 2018 00:37:25 +0200
Subject: [PATCH] Support downloading web documents

---
 telethon/client/downloads.py | 74 ++++++++++++++++++++++++++++++------
 telethon/utils.py            |  3 +-
 2 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/telethon/client/downloads.py b/telethon/client/downloads.py
index abf7f7c9..85d3375c 100644
--- a/telethon/client/downloads.py
+++ b/telethon/client/downloads.py
@@ -8,6 +8,12 @@ from .users import UserMethods
 from .. import utils, helpers, errors
 from ..tl import TLObject, types, functions
 
+try:
+    import aiohttp
+except ImportError:
+    aiohttp = None
+
+
 __log__ = logging.getLogger(__name__)
 
 
@@ -140,6 +146,10 @@ class DownloadMethods(UserMethods):
             return self._download_contact(
                 media, file
             )
+        elif isinstance(media, (types.WebDocument, types.WebDocumentNoProxy)):
+            return await self._download_web_document(
+                media, file, progress_callback
+            )
 
     async def download_file(
             self, input_location, file=None, *, part_size_kb=None,
@@ -298,19 +308,12 @@ class DownloadMethods(UserMethods):
             progress_callback=progress_callback)
         return file
 
-    async def _download_document(
-            self, document, file, date, progress_callback):
-        """Specialized version of .download_media() for documents."""
-        if isinstance(document, types.MessageMediaDocument):
-            document = document.document
-        if not isinstance(document, types.Document):
-            return
-
-        file_size = document.size
-
+    @staticmethod
+    def _get_kind_and_names(attributes):
+        """Gets kind and possible names for :tl:`DocumentAttribute`."""
         kind = 'document'
         possible_names = []
-        for attr in document.attributes:
+        for attr in attributes:
             if isinstance(attr, types.DocumentAttributeFilename):
                 possible_names.insert(0, attr.file_name)
 
@@ -327,13 +330,24 @@ class DownloadMethods(UserMethods):
                 elif attr.voice:
                     kind = 'voice'
 
+        return kind, possible_names
+
+    async def _download_document(
+            self, document, file, date, progress_callback):
+        """Specialized version of .download_media() for documents."""
+        if isinstance(document, types.MessageMediaDocument):
+            document = document.document
+        if not isinstance(document, types.Document):
+            return
+
+        kind, possible_names = self._get_kind_and_names(document.attributes)
         file = self._get_proper_filename(
             file, kind, utils.get_extension(document),
             date=date, possible_names=possible_names
         )
 
         await self.download_file(
-            document, file, file_size=file_size,
+            document, file, file_size=document.size,
             progress_callback=progress_callback)
         return file
 
@@ -373,6 +387,42 @@ class DownloadMethods(UserMethods):
 
         return file
 
+    @classmethod
+    async def _download_web_document(cls, web, file, progress_callback):
+        """
+        Specialized version of .download_media() for web documents.
+        """
+        if not aiohttp:
+            raise ValueError(
+                'Cannot download web documents without the aiohttp '
+                'dependency install it (pip install aiohttp)'
+            )
+
+        # TODO Better way to get opened handles of files and auto-close
+        if isinstance(file, str):
+            kind, possible_names = cls._get_kind_and_names(web.attributes)
+            file = cls._get_proper_filename(
+                file, kind, utils.get_extension(web),
+                possible_names=possible_names
+            )
+            f = open(file, 'wb')
+        else:
+            f = file
+
+        try:
+            with aiohttp.ClientSession() as session:
+                # TODO Use progress_callback; get content length from response
+                # https://github.com/telegramdesktop/tdesktop/blob/c7e773dd9aeba94e2be48c032edc9a78bb50234e/Telegram/SourceFiles/ui/images.cpp#L1318-L1319
+                async with session.get(web.url) as response:
+                    while True:
+                        chunk = await response.content.read(128 * 1024)
+                        if not chunk:
+                            break
+                        f.write(chunk)
+        finally:
+            if isinstance(file, str):
+                f.close()
+
     @staticmethod
     def _get_proper_filename(file, kind, extension,
                              date=None, possible_names=None):
diff --git a/telethon/utils.py b/telethon/utils.py
index ee123b32..81bbe11d 100644
--- a/telethon/utils.py
+++ b/telethon/utils.py
@@ -92,7 +92,8 @@ def get_extension(media):
     # Documents will come with a mime type
     if isinstance(media, types.MessageMediaDocument):
         media = media.document
-    if isinstance(media, types.Document):
+    if isinstance(media, (
+            types.Document, types.WebDocument, types.WebDocumentNoProxy)):
         if media.mime_type == 'application/octet-stream':
             # Octet stream are just bytes, which have no default extension
             return ''