Replace TLObject.on_send with the new .to_bytes()

This also replaces some int.to_bytes() calls with a faster
struct.pack (up to x4 faster). This approach is also around
x3 faster than creating a BinaryWriter just to serialize a
TLObject as bytes.
This commit is contained in:
Lonami Exo 2017-09-26 14:36:02 +02:00
parent 2bb26d6389
commit b83cd98ba0
5 changed files with 110 additions and 64 deletions

View File

@ -110,7 +110,7 @@ class BinaryWriter:
def tgwrite_object(self, tlobject): def tgwrite_object(self, tlobject):
"""Writes a Telegram object""" """Writes a Telegram object"""
tlobject.on_send(self) self.write(tlobject.to_bytes())
def tgwrite_vector(self, vector): def tgwrite_vector(self, vector):
"""Writes a vector of Telegram objects""" """Writes a vector of Telegram objects"""

View File

@ -71,19 +71,14 @@ class MtProtoSender:
else: else:
request = MessageContainer(self.session, requests) request = MessageContainer(self.session, requests)
with BinaryWriter() as writer: self._send_packet(request.to_bytes(), request)
request.on_send(writer) self._pending_receive.append(request)
self._send_packet(writer.get_bytes(), request)
self._pending_receive.append(request)
def _send_acknowledges(self): def _send_acknowledges(self):
"""Sends a messages acknowledge for all those who _need_confirmation""" """Sends a messages acknowledge for all those who _need_confirmation"""
if self._need_confirmation: if self._need_confirmation:
msgs_ack = MsgsAck(self._need_confirmation) msgs_ack = MsgsAck(self._need_confirmation)
with BinaryWriter() as writer: self._send_packet(msgs_ack.to_bytes(), msgs_ack)
msgs_ack.on_send(writer)
self._send_packet(writer.get_bytes(), msgs_ack)
del self._need_confirmation[:] del self._need_confirmation[:]
def receive(self, update_state): def receive(self, update_state):

View File

@ -18,17 +18,21 @@ class MessageContainer(TLObject):
writer.write_int(0x73f1f8dc, signed=False) writer.write_int(0x73f1f8dc, signed=False)
writer.write_int(len(self.requests)) writer.write_int(len(self.requests))
for x in self.requests: for x in self.requests:
with BinaryWriter() as aux: x.request_msg_id = self.session.get_new_msg_id()
x.on_send(aux)
x.request_msg_id = self.session.get_new_msg_id()
writer.write_long(x.request_msg_id) writer.write_long(x.request_msg_id)
writer.write_int( writer.write_int(
self.session.generate_sequence(x.content_related) self.session.generate_sequence(x.content_related)
) )
packet = aux.get_bytes() packet = x.to_bytes()
writer.write_int(len(packet)) writer.write_int(len(packet))
writer.write(packet) writer.write(packet)
def to_bytes(self):
# TODO Change this to delete the on_send from this class
with BinaryWriter() as writer:
self.on_send(writer)
return writer.get_bytes()
@staticmethod @staticmethod
def iter_read(reader): def iter_read(reader):

View File

@ -84,12 +84,42 @@ class TLObject:
return ''.join(result) return ''.join(result)
@staticmethod
def serialize_bytes(data):
"""Write bytes by using Telegram guidelines"""
r = []
if len(data) < 254:
padding = (len(data) + 1) % 4
if padding != 0:
padding = 4 - padding
r.append(bytes([len(data)]))
r.append(data)
else:
padding = len(data) % 4
if padding != 0:
padding = 4 - padding
r.append(bytes([254]))
r.append(bytes([len(data) % 256]))
r.append(bytes([(len(data) >> 8) % 256]))
r.append(bytes([(len(data) >> 16) % 256]))
r.append(data)
r.append(bytes(padding))
return b''.join(r)
@staticmethod
def serialize_string(string):
return TLObject.serialize_bytes(string.encode('utf-8'))
# These should be overrode # These should be overrode
def to_dict(self, recursive=True): def to_dict(self, recursive=True):
return {} return {}
def on_send(self, writer): def to_bytes(self):
pass return b''
def on_response(self, reader): def on_response(self, reader):
pass pass

View File

@ -1,6 +1,7 @@
import os import os
import re import re
import shutil import shutil
import struct
from zlib import crc32 from zlib import crc32
from collections import defaultdict from collections import defaultdict
@ -150,6 +151,9 @@ class TLGenerator:
# for all those TLObjects with arg.can_be_inferred. # for all those TLObjects with arg.can_be_inferred.
builder.writeln('import os') builder.writeln('import os')
# Import struct for the .to_bytes(self) serialization
builder.writeln('import struct')
# Generate the class for every TLObject # Generate the class for every TLObject
for t in sorted(tlobjects, key=lambda x: x.name): for t in sorted(tlobjects, key=lambda x: x.name):
TLGenerator._write_source_code( TLGenerator._write_source_code(
@ -294,16 +298,18 @@ class TLGenerator:
builder.end_block() builder.end_block()
# Write the on_send(self, writer) function # Write the .to_bytes() function
builder.writeln('def on_send(self, writer):') builder.writeln('def to_bytes(self):')
builder.writeln( builder.write("return b''.join((")
'writer.write_int({}.constructor_id, signed=False)'
.format(tlobject.class_name()) # First constructor code, we already know its bytes
) builder.write('{},'.format(repr(struct.pack('<I', tlobject.id))))
for arg in tlobject.args: for arg in tlobject.args:
TLGenerator.write_onsend_code(builder, arg, if TLGenerator.write_to_bytes(builder, arg, tlobject.args):
tlobject.args) builder.write(',')
builder.writeln('))')
builder.end_block() builder.end_block()
# Write the empty() function, which returns an "empty" # Write the empty() function, which returns an "empty"
@ -409,18 +415,17 @@ class TLGenerator:
return result return result
@staticmethod @staticmethod
def write_onsend_code(builder, arg, args, name=None): def write_to_bytes(builder, arg, args, name=None):
""" """
Writes the write code for the given argument Writes the .to_bytes() code for the given argument
:param builder: The source code builder :param builder: The source code builder
:param arg: The argument to write :param arg: The argument to write
:param args: All the other arguments in TLObject same on_send. :param args: All the other arguments in TLObject same to_bytes.
This is required to determine the flags value This is required to determine the flags value
:param name: The name of the argument. Defaults to "self.argname" :param name: The name of the argument. Defaults to "self.argname"
This argument is an option because it's required when This argument is an option because it's required when
writing Vectors<> writing Vectors<>
""" """
if arg.generic_definition: if arg.generic_definition:
return # Do nothing, this only specifies a later type return # Do nothing, this only specifies a later type
@ -434,73 +439,85 @@ class TLGenerator:
if arg.is_flag: if arg.is_flag:
if arg.type == 'true': if arg.type == 'true':
return # Exit, since True type is never written return # Exit, since True type is never written
elif arg.is_vector:
# Vector flags are special since they consist of 3 values,
# so we need an extra join here. Note that empty vector flags
# should NOT be sent either!
builder.write("b'' if not {} else b''.join((".format(name))
else: else:
builder.writeln('if {}:'.format(name)) builder.write("b'' if not {} else (".format(name))
if arg.is_vector: if arg.is_vector:
if arg.use_vector_id: if arg.use_vector_id:
builder.writeln('writer.write_int(0x1cb5c415, signed=False)') # vector code, unsigned 0x1cb5c415 as little endian
builder.write(r"b'\x15\xc4\xb5\x1c',")
builder.write("struct.pack('<i', len({})),".format(name))
# Unpack the values for the outer tuple
builder.write('*[(')
builder.writeln('writer.write_int(len({}))'.format(name))
builder.writeln('for _x in {}:'.format(name))
# Temporary disable .is_vector, not to enter this if again # Temporary disable .is_vector, not to enter this if again
arg.is_vector = False # Also disable .is_flag since it's not needed per element
TLGenerator.write_onsend_code(builder, arg, args, name='_x') old_flag = arg.is_flag
arg.is_vector = arg.is_flag = False
TLGenerator.write_to_bytes(builder, arg, args, name='x')
arg.is_vector = True arg.is_vector = True
arg.is_flag = old_flag
builder.write(') for x in {}]'.format(name))
elif arg.flag_indicator: elif arg.flag_indicator:
# Calculate the flags with those items which are not None # Calculate the flags with those items which are not None
builder.writeln('flags = 0') builder.write("struct.pack('<I', {})".format(
for flag in args: ' | '.join('(1 << {} if {} else 0)'.format(
if flag.is_flag: flag.flag_index, 'self.{}'.format(flag.name)
builder.writeln('flags |= (1 << {}) if {} else 0'.format( ) for flag in args if flag.is_flag)
flag.flag_index, 'self.{}'.format(flag.name))) ))
builder.writeln('writer.write_int(flags)')
builder.writeln()
elif 'int' == arg.type: elif 'int' == arg.type:
builder.writeln('writer.write_int({})'.format(name)) # struct.pack is around 4 times faster than int.to_bytes
builder.write("struct.pack('<i', {})".format(name))
elif 'long' == arg.type: elif 'long' == arg.type:
builder.writeln('writer.write_long({})'.format(name)) builder.write("struct.pack('<q', {})".format(name))
elif 'int128' == arg.type: elif 'int128' == arg.type:
builder.writeln('writer.write_large_int({}, bits=128)'.format( builder.write("int.to_bytes({}, 16, 'little', signed=True)")
name))
elif 'int256' == arg.type: elif 'int256' == arg.type:
builder.writeln('writer.write_large_int({}, bits=256)'.format( builder.write("int.to_bytes({}, 32, 'little', signed=True)")
name))
elif 'double' == arg.type: elif 'double' == arg.type:
builder.writeln('writer.write_double({})'.format(name)) builder.write("struct.pack('<d', {})".format(name))
elif 'string' == arg.type: elif 'string' == arg.type:
builder.writeln('writer.tgwrite_string({})'.format(name)) builder.write('TLObject.serialize_string({})'.format(name))
elif 'Bool' == arg.type: elif 'Bool' == arg.type:
builder.writeln('writer.tgwrite_bool({})'.format(name)) # 0x997275b5 if boolean else 0xbc799737
builder.write(r"b'\xb5ur\x99' if {} else b'7\x97y\xbc'")
elif 'true' == arg.type: elif 'true' == arg.type:
pass # These are actually NOT written! Only used for flags pass # These are actually NOT written! Only used for flags
elif 'bytes' == arg.type: elif 'bytes' == arg.type:
builder.writeln('writer.tgwrite_bytes({})'.format(name)) builder.write('TLObject.serialize_bytes({})'.format(name))
elif 'date' == arg.type: # Custom format elif 'date' == arg.type: # Custom format
builder.writeln('writer.tgwrite_date({})'.format(name)) # 0 if datetime is None else int(datetime.timestamp())
builder.write(r"b'\0\0\0\0' if {0} is None else struct.pack('<I', int({0}.timestamp()))".format(name))
else: else:
# Else it may be a custom type # Else it may be a custom type
builder.writeln('{}.on_send(writer)'.format(name)) builder.write('{}.to_bytes()'.format(name))
# End vector and flag blocks if required (if we opened them before)
if arg.is_vector:
builder.end_block()
if arg.is_flag: if arg.is_flag:
builder.end_block() builder.write(')')
if arg.is_vector:
builder.write(')') # We were using a tuple
return True # Something was written
@staticmethod @staticmethod
def write_onresponse_code(builder, arg, args, name=None): def write_onresponse_code(builder, arg, args, name=None):