Reduce the amount of time the database is locked

This commit is contained in:
tcely 2025-03-17 09:50:15 -04:00 committed by GitHub
parent cda93f0df2
commit deed3a3393
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -181,7 +181,6 @@ def cleanup_removed_media(source, videos):
@background(schedule=300, remove_existing_tasks=True)
@atomic(durable=True)
def index_source_task(source_id):
'''
Indexes media available from a Source object.
@ -206,51 +205,54 @@ def index_source_task(source_id):
source.save()
log.info(f'Found {len(videos)} media items for source: {source}')
fields = lambda f, m: m.get_metadata_field(f)
for video in videos:
# Create or update each video as a Media object
key = video.get(source.key_field, None)
if not key:
# Video has no unique key (ID), it can't be indexed
continue
try:
media = Media.objects.get(key=key, source=source)
except Media.DoesNotExist:
media = Media(key=key)
media.source = source
media.duration = float(video.get(fields('duration', media), None) or 0) or None
media.title = str(video.get(fields('title', media), ''))[:200]
timestamp = video.get(fields('timestamp', media), None)
published_dt = media.metadata_published(timestamp)
if published_dt is not None:
media.published = published_dt
try:
with atomic():
media.save()
log.debug(f'Indexed media: {source} / {media}')
# log the new media instances
new_media_instance = (
media.created and
source.last_crawl and
media.created >= source.last_crawl
)
if new_media_instance:
log.info(f'Indexed new media: {source} / {media}')
log.info(f'Scheduling task to download metadata for: {media.url}')
verbose_name = _('Downloading metadata for "{}"')
download_media_metadata(
str(media.pk),
priority=9,
verbose_name=verbose_name.format(media.pk),
with atomic(durable=True):
for video in videos:
# Create or update each video as a Media object
key = video.get(source.key_field, None)
if not key:
# Video has no unique key (ID), it can't be indexed
continue
try:
media = Media.objects.get(key=key, source=source)
except Media.DoesNotExist:
media = Media(key=key)
media.source = source
media.duration = float(video.get(fields('duration', media), None) or 0) or None
media.title = str(video.get(fields('title', media), ''))[:200]
timestamp = video.get(fields('timestamp', media), None)
published_dt = media.metadata_published(timestamp)
if published_dt is not None:
media.published = published_dt
try:
with atomic():
media.save()
except IntegrityError as e:
log.error(f'Index media failed: {source} / {media} with "{e}"')
else:
log.debug(f'Indexed media: {source} / {media}')
# log the new media instances
new_media_instance = (
media.created and
source.last_crawl and
media.created >= source.last_crawl
)
except IntegrityError as e:
log.error(f'Index media failed: {source} / {media} with "{e}"')
if new_media_instance:
log.info(f'Indexed new media: {source} / {media}')
log.info(f'Scheduling task to download metadata for: {media.url}')
verbose_name = _('Downloading metadata for "{}"')
download_media_metadata(
str(media.pk),
priority=20,
verbose_name=verbose_name.format(media.pk),
)
# Tack on a cleanup of old completed tasks
cleanup_completed_tasks()
# Tack on a cleanup of old media
cleanup_old_media()
if source.delete_removed_media:
log.info(f'Cleaning up media no longer in source: {source}')
cleanup_removed_media(source, videos)
with atomic(durable=True):
# Tack on a cleanup of old media
cleanup_old_media()
if source.delete_removed_media:
log.info(f'Cleaning up media no longer in source: {source}')
cleanup_removed_media(source, videos)
@background(schedule=0)