diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index d004362a..7e7a0f0f 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -181,7 +181,6 @@ def cleanup_removed_media(source, videos): @background(schedule=300, remove_existing_tasks=True) -@atomic(durable=True) def index_source_task(source_id): ''' Indexes media available from a Source object. @@ -206,51 +205,54 @@ def index_source_task(source_id): source.save() log.info(f'Found {len(videos)} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) - for video in videos: - # Create or update each video as a Media object - key = video.get(source.key_field, None) - if not key: - # Video has no unique key (ID), it can't be indexed - continue - try: - media = Media.objects.get(key=key, source=source) - except Media.DoesNotExist: - media = Media(key=key) - media.source = source - media.duration = float(video.get(fields('duration', media), None) or 0) or None - media.title = str(video.get(fields('title', media), ''))[:200] - timestamp = video.get(fields('timestamp', media), None) - published_dt = media.metadata_published(timestamp) - if published_dt is not None: - media.published = published_dt - try: - with atomic(): - media.save() - log.debug(f'Indexed media: {source} / {media}') - # log the new media instances - new_media_instance = ( - media.created and - source.last_crawl and - media.created >= source.last_crawl - ) - if new_media_instance: - log.info(f'Indexed new media: {source} / {media}') - log.info(f'Scheduling task to download metadata for: {media.url}') - verbose_name = _('Downloading metadata for "{}"') - download_media_metadata( - str(media.pk), - priority=9, - verbose_name=verbose_name.format(media.pk), + with atomic(durable=True): + for video in videos: + # Create or update each video as a Media object + key = video.get(source.key_field, None) + if not key: + # Video has no unique key (ID), it can't be indexed + continue + try: + media = Media.objects.get(key=key, source=source) + except Media.DoesNotExist: + media = Media(key=key) + media.source = source + media.duration = float(video.get(fields('duration', media), None) or 0) or None + media.title = str(video.get(fields('title', media), ''))[:200] + timestamp = video.get(fields('timestamp', media), None) + published_dt = media.metadata_published(timestamp) + if published_dt is not None: + media.published = published_dt + try: + with atomic(): + media.save() + except IntegrityError as e: + log.error(f'Index media failed: {source} / {media} with "{e}"') + else: + log.debug(f'Indexed media: {source} / {media}') + # log the new media instances + new_media_instance = ( + media.created and + source.last_crawl and + media.created >= source.last_crawl ) - except IntegrityError as e: - log.error(f'Index media failed: {source} / {media} with "{e}"') + if new_media_instance: + log.info(f'Indexed new media: {source} / {media}') + log.info(f'Scheduling task to download metadata for: {media.url}') + verbose_name = _('Downloading metadata for "{}"') + download_media_metadata( + str(media.pk), + priority=20, + verbose_name=verbose_name.format(media.pk), + ) # Tack on a cleanup of old completed tasks cleanup_completed_tasks() - # Tack on a cleanup of old media - cleanup_old_media() - if source.delete_removed_media: - log.info(f'Cleaning up media no longer in source: {source}') - cleanup_removed_media(source, videos) + with atomic(durable=True): + # Tack on a cleanup of old media + cleanup_old_media() + if source.delete_removed_media: + log.info(f'Cleaning up media no longer in source: {source}') + cleanup_removed_media(source, videos) @background(schedule=0)