diff --git a/setup.py b/setup.py index 8ac426f5..74aa8a57 100755 --- a/setup.py +++ b/setup.py @@ -15,7 +15,6 @@ import json import os import re import shutil -from codecs import open from sys import argv from setuptools import find_packages, setup @@ -40,11 +39,10 @@ class TempWorkDir: GENERATOR_DIR = 'telethon_generator' LIBRARY_DIR = 'telethon' -ERRORS_IN_JSON = os.path.join(GENERATOR_DIR, 'data', 'errors.json') -ERRORS_IN_DESC = os.path.join(GENERATOR_DIR, 'data', 'error_descriptions') +ERRORS_IN = os.path.join(GENERATOR_DIR, 'data', 'errors.csv') ERRORS_OUT = os.path.join(LIBRARY_DIR, 'errors', 'rpcerrorlist.py') -INVALID_BM_IN = os.path.join(GENERATOR_DIR, 'data', 'invalid_bot_methods.json') +METHODS_IN = os.path.join(GENERATOR_DIR, 'data', 'methods.csv') TLOBJECT_IN_CORE_TL = os.path.join(GENERATOR_DIR, 'data', 'mtproto_api.tl') TLOBJECT_IN_TL = os.path.join(GENERATOR_DIR, 'data', 'telegram_api.tl') @@ -56,16 +54,19 @@ DOCS_OUT = 'docs' def generate(which): - from telethon_generator.parsers import parse_errors, parse_tl, find_layer + # TODO make docs generator use the new CSV too + from telethon_generator.parsers import\ + parse_errors, parse_methods, parse_tl, find_layer + from telethon_generator.generators import\ generate_errors, generate_tlobjects, generate_docs, clean_tlobjects - # Older Python versions open the file as bytes instead (3.4.2) - with open(INVALID_BM_IN, 'r') as f: - invalid_bot_methods = set(json.load(f)) - layer = find_layer(TLOBJECT_IN_TL) - errors = list(parse_errors(ERRORS_IN_JSON, ERRORS_IN_DESC)) + errors = list(parse_errors(ERRORS_IN)) + methods = list(parse_methods(METHODS_IN, {e.str_code: e for e in errors})) + invalid_bot_methods = {m.name for m in methods + if not m.usability.startswith('bot')} + tlobjects = list(itertools.chain( parse_tl(TLOBJECT_IN_CORE_TL, layer, invalid_bot_methods), parse_tl(TLOBJECT_IN_TL, layer, invalid_bot_methods))) diff --git a/telethon_generator/generators/docs.py b/telethon_generator/generators/docs.py index 836ae89f..08647ff0 100755 --- a/telethon_generator/generators/docs.py +++ b/telethon_generator/generators/docs.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import csv import functools import os import re @@ -9,7 +10,6 @@ from ..docswriter import DocsWriter from ..parsers import TLObject from ..utils import snake_to_camel_case - CORE_TYPES = { 'int', 'long', 'int128', 'int256', 'double', 'vector', 'string', 'bool', 'true', 'bytes', 'date' diff --git a/telethon_generator/parsers/__init__.py b/telethon_generator/parsers/__init__.py index 9034450e..562ed88c 100644 --- a/telethon_generator/parsers/__init__.py +++ b/telethon_generator/parsers/__init__.py @@ -1,2 +1,3 @@ from .errors import Error, parse_errors +from .methods import MethodInfo, parse_methods from .tlobject import TLObject, parse_tl, find_layer diff --git a/telethon_generator/parsers/errors.py b/telethon_generator/parsers/errors.py index 5ceedb3e..f4d99dc2 100644 --- a/telethon_generator/parsers/errors.py +++ b/telethon_generator/parsers/errors.py @@ -1,6 +1,4 @@ -import json -import re -from collections import defaultdict +import csv from ..utils import snake_to_camel_case @@ -16,15 +14,8 @@ KNOWN_BASE_CLASSES = { 500: 'ServerError', } -# The API doesn't return the code for some (vital) errors. They are -# all assumed to be 400, except these well-known ones that aren't. -KNOWN_CODES = { - 'ACTIVE_USER_REQUIRED': 401, - 'AUTH_KEY_UNREGISTERED': 401, - 'USER_DEACTIVATED': 401 -} - # Give better semantic names to some captures +# TODO Move this to the CSV? CAPTURE_NAMES = { 'FloodWaitError': 'seconds', 'FloodTestPhoneWaitError': 'seconds', @@ -52,96 +43,40 @@ def _get_class_name(error_code): class Error: - def __init__(self, int_code, str_code, description, caused_by): - # TODO Some errors have the same str_code but different int_code + def __init__(self, codes, name, description): + # TODO Some errors have the same name but different integer codes # Should these be split into different files or doesn't really matter? # Telegram isn't exactly consistent with returned errors anyway. - self.int_code = int_code - self.str_code = str_code - self.subclass = _get_class_name(int_code) - self.subclass_exists = int_code in KNOWN_BASE_CLASSES + self.int_code = codes[0] + self.str_code = name + self.subclass = _get_class_name(codes[0]) + self.subclass_exists = codes[0] in KNOWN_BASE_CLASSES self.description = description - self.caused_by = list(sorted(caused_by)) - self.has_captures = '_X' in str_code + self.has_captures = '_X' in name if self.has_captures: - self.name = _get_class_name(str_code.replace('_X', '')) - self.pattern = str_code.replace('_X', r'_(\d+)') + self.name = _get_class_name(name.replace('_X', '')) + self.pattern = name.replace('_X', r'_(\d+)') self.capture_name = CAPTURE_NAMES.get(self.name, 'x') else: - self.name = _get_class_name(str_code) - self.pattern = str_code + self.name = _get_class_name(name) + self.pattern = name self.capture_name = None -def parse_errors(json_file, descriptions_file): +def parse_errors(csv_file): """ - Parses the given JSON file in the following format: - { - "ok": true, - "human_result": {"int_code": ["descriptions"]}, - "result": {"int_code": {"full_method_name": ["str_error"]}} - } - - The descriptions file, which has precedence over the JSON's human_result, - should have the following format: - # comment - str_error=Description - - The method yields `Error` instances as a result. + Parses the input CSV file with columns (name, error codes, description) + and yields `Error` instances as a result. """ - with open(json_file, 'r', encoding='utf-8') as f: - data = json.load(f) + with open(csv_file, newline='') as f: + f = csv.reader(f) + next(f, None) # header + for line, (name, codes, description) in enumerate(f, start=2): + try: + codes = [int(x) for x in codes.split()] or [400] + except ValueError: + raise ValueError('Not all codes are integers ' + '(line {})'.format(line)) from None - errors = defaultdict(set) - error_to_method = defaultdict(set) - # PWRTelegram's API doesn't return all errors, which we do need here. - # Add some special known-cases manually first. - errors[420].update(( - 'FLOOD_WAIT_X', 'FLOOD_TEST_PHONE_WAIT_X', 'TAKEOUT_INIT_DELAY_X' - )) - errors[401].update(( - 'AUTH_KEY_INVALID', 'SESSION_EXPIRED', 'SESSION_REVOKED' - )) - errors[303].update(( - 'FILE_MIGRATE_X', 'PHONE_MIGRATE_X', - 'NETWORK_MIGRATE_X', 'USER_MIGRATE_X' - )) - for int_code, method_errors in data['result'].items(): - for method, error_list in method_errors.items(): - for error in error_list: - error = re.sub('_\d+', '_X', error).upper() - errors[int(int_code)].add(error) - error_to_method[error].add(method) - - # Some errors are in the human result, but not with a code. Assume 400 - for error in data['human_result']: - if error[0] != '-' and not error.isdigit(): - error = re.sub('_\d+', '_X', error).upper() - if not any(error in es for es in errors.values()): - errors[KNOWN_CODES.get(error, 400)].add(error) - - # Prefer the descriptions that are related with Telethon way of coding - # to those that PWRTelegram's API provides. - telethon_descriptions = {} - with open(descriptions_file, 'r', encoding='utf-8') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#'): - equal = line.index('=') - message, description = line[:equal], line[equal + 1:] - telethon_descriptions[message.rstrip()] = description.lstrip() - - for int_code, error_set in errors.items(): - for str_code in sorted(error_set): - description = telethon_descriptions.get( - str_code, '\n'.join(data['human_result'].get( - str_code, ['No description known'] - )) - ) - yield Error( - int_code=int_code, - str_code=str_code, - description=description, - caused_by=error_to_method[str_code] - ) + yield Error([int(x) for x in codes], name, description) diff --git a/telethon_generator/parsers/methods.py b/telethon_generator/parsers/methods.py new file mode 100644 index 00000000..6d2572a0 --- /dev/null +++ b/telethon_generator/parsers/methods.py @@ -0,0 +1,29 @@ +import csv + + +class MethodInfo: + def __init__(self, name, usability, errors): + self.name = name + self.usability = usability + self.errors = errors + + +def parse_methods(csv_file, errors_dict): + """ + Parses the input CSV file with columns (method, usability, errors) + and yields `MethodInfo` instances as a result. + """ + with open(csv_file, newline='') as f: + f = csv.reader(f) + next(f, None) # header + for line, (method, usability, errors) in enumerate(f, start=2): + if usability not in ('user', 'bot', 'both', 'unknown'): + raise ValueError('Usability must be either user, bot, ' + 'both or unknown, not {}'.format(usability)) + try: + errors = [errors_dict[x] for x in errors.split()] + except KeyError: + raise ValueError('Method {} references unknown errors {}' + .format(method, errors)) from None + + yield MethodInfo(method, usability, errors)