Be more efficient with Media.loaded_metadata

This commit is contained in:
tcely 2025-02-23 17:47:54 -05:00 committed by GitHub
parent 1f0169ae4b
commit be0cbd6cd0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -547,6 +547,9 @@ class Media(models.Model):
Source. Source.
''' '''
# Used to convert seconds to datetime
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
# Format to use to display a URL for the media # Format to use to display a URL for the media
URLS = _srctype_dict('https://www.youtube.com/watch?v={key}') URLS = _srctype_dict('https://www.youtube.com/watch?v={key}')
@ -771,6 +774,7 @@ class Media(models.Model):
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
# Trigger an update of derived fields from metadata # Trigger an update of derived fields from metadata
if self.metadata: if self.metadata:
setattr(self, '_cached_metadata_dict', None)
self.title = self.metadata_title[:200] self.title = self.metadata_title[:200]
self.duration = self.metadata_duration self.duration = self.metadata_duration
if update_fields is not None and "metadata" in update_fields: if update_fields is not None and "metadata" in update_fields:
@ -1000,20 +1004,28 @@ class Media(models.Model):
@property @property
def reduce_data(self): def reduce_data(self):
try: try:
from common.logger import log
from common.utils import json_serial
old_mdl = len(self.metadata or "")
data = json.loads(self.metadata or "{}") data = json.loads(self.metadata or "{}")
if '_reduce_data_ran_at' in data.keys():
total_seconds = data['_reduce_data_ran_at']
ran_at = posix_epoch + timedelta(seconds=total_seconds)
if (timezone.now() - ran_at) < timedelta(hours=1):
return data
from common.utils import json_serial
compact_json = json.dumps(data, separators=(',', ':'), default=json_serial) compact_json = json.dumps(data, separators=(',', ':'), default=json_serial)
filtered_data = filter_response(data, True) filtered_data = filter_response(data, True)
filtered_data['_reduce_data_ran_at'] = round((timezone.now() - posix_epoch).total_seconds())
filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial) filtered_json = json.dumps(filtered_data, separators=(',', ':'), default=json_serial)
except Exception as e: except Exception as e:
from common.logger import log
log.exception('reduce_data: %s', e) log.exception('reduce_data: %s', e)
else: else:
from common.logger import log
log.debug(f'reduce_data: running for: {self.source.name} / {self.key}')
# log the results of filtering / compacting on metadata size # log the results of filtering / compacting on metadata size
new_mdl = len(compact_json) new_mdl = len(compact_json)
old_mdl = len(self.metadata or "")
if old_mdl > new_mdl: if old_mdl > new_mdl:
delta = old_mdl - new_mdl delta = old_mdl - new_mdl
log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') log.info(f'{self.key}: metadata compacted by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})')
@ -1023,16 +1035,24 @@ class Media(models.Model):
log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})') log.info(f'{self.key}: metadata reduced by {delta:,} characters ({old_mdl:,} -> {new_mdl:,})')
if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False):
self.metadata = filtered_json self.metadata = filtered_json
return filtered_data
@property @property
def loaded_metadata(self): def loaded_metadata(self):
data = None
if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False): if getattr(settings, 'SHRINK_OLD_MEDIA_METADATA', False):
self.reduce_data data = self.reduce_data
try: try:
data = json.loads(self.metadata) if not data:
cached = getattr(self, '_cached_metadata_dict', None)
if cached:
data = cached
else:
data = json.loads(self.metadata or "{}")
if not isinstance(data, dict): if not isinstance(data, dict):
return {} return {}
setattr(self, '_cached_metadata_dict', data)
return data return data
except Exception as e: except Exception as e:
return {} return {}
@ -1100,7 +1120,6 @@ class Media(models.Model):
if timestamp is not None: if timestamp is not None:
try: try:
timestamp_float = float(timestamp) timestamp_float = float(timestamp)
posix_epoch = datetime(1970, 1, 1, tzinfo=tz.utc)
published_dt = posix_epoch + timedelta(seconds=timestamp_float) published_dt = posix_epoch + timedelta(seconds=timestamp_float)
except Exception as e: except Exception as e:
log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"') log.warn(f'Could not compute published from timestamp for: {self.source} / {self} with "{e}"')