From 3f10e45e6a1fa74380832d28f22a781616655118 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 10 Mar 2025 20:26:47 -0400 Subject: [PATCH 1/8] Add timing and profiling decorators --- tubesync/common/utils.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index 95efd9f3..a292ddc5 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -1,7 +1,11 @@ +import cProfile +import emoji +import io +import pstats import string +import time from datetime import datetime from urllib.parse import urlunsplit, urlencode, urlparse -import emoji from yt_dlp.utils import LazyList from .errors import DatabaseConnectionError @@ -136,3 +140,29 @@ def json_serial(obj): if isinstance(obj, LazyList): return list(obj) raise TypeError(f'Type {type(obj)} is not json_serial()-able') + + +def time_func(func): + def wrapper(*args, **kwargs): + start = time.perf_counter() + result = func(*args, **kwargs) + end = time.perf_counter() + elapsed = end - start + return (result, (start - end, start, end,),) + return wrapper + + +def profile_func(func): + def wrapper(*args, **kwargs): + s = io.StringIO() + with cProfile.Profile() as pr: + pr.enable() + result = func(*args, **kwargs) + pr.disable() + ps = pstats.Stats(pr, stream=s) + ps.sort_stats( + pstats.SortKey.CUMULATIVE + ).print_stats() + return (result, (s.getvalue(), ps),) + return wrapper + From 5f11779dc2c55ee7e6f673cef8feb0f264679912 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 05:17:46 -0400 Subject: [PATCH 2/8] Time some index functions --- tubesync/sync/tasks.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index fdc954a3..58768b04 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -183,16 +183,38 @@ def index_source_task(source_id): ''' Indexes media available from a Source object. ''' + + from common.utils import time_func, profile_func + def get_source(source_id): + @time_func + def f(sid): + return Source.objects.get(pk=sid) + rt = f(source_id) + elapsed = rt[1][0] + log.debug(f'get_source: took {elapsed:.6f} seconds') + return rt[0] + def time_model_function(instance, func): + @time_func + def f(o, c): + return o.c() + rt = f(instance, func) + elapsed = rt[1][0] + log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') + return rt[0] + try: - source = Source.objects.get(pk=source_id) + #source = Source.objects.get(pk=source_id) + source = get_source(source_id) except Source.DoesNotExist: # Task triggered but the Source has been deleted, delete the task return # Reset any errors source.has_failed = False - source.save() + #source.save() + time_model_function(source, source.save) # Index the source - videos = source.index_media() + #videos = source.index_media() + videos = time_model_function(source, source.index_media) if not videos: raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' @@ -200,7 +222,8 @@ def index_source_task(source_id): f'is reachable') # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() - source.save() + #source.save() + time_model_function(source, source.save) log.info(f'Found {len(videos)} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) for video in videos: @@ -221,7 +244,8 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - media.save() + #media.save() + time_model_function(media, media.save) log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( From d69866ded8c391715ea5a315a44aa7109f21d67b Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 05:45:00 -0400 Subject: [PATCH 3/8] fixup: call function with instance --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 58768b04..bfec2d2e 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -196,7 +196,7 @@ def index_source_task(source_id): def time_model_function(instance, func): @time_func def f(o, c): - return o.c() + return c(o) rt = f(instance, func) elapsed = rt[1][0] log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') From 76c60830eb093822344611635c4c4736b811bceb Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 06:54:33 -0400 Subject: [PATCH 4/8] Positive timing values --- tubesync/common/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tubesync/common/utils.py b/tubesync/common/utils.py index a292ddc5..7bf89041 100644 --- a/tubesync/common/utils.py +++ b/tubesync/common/utils.py @@ -147,8 +147,7 @@ def time_func(func): start = time.perf_counter() result = func(*args, **kwargs) end = time.perf_counter() - elapsed = end - start - return (result, (start - end, start, end,),) + return (result, (end - start, start, end,),) return wrapper @@ -163,6 +162,6 @@ def profile_func(func): ps.sort_stats( pstats.SortKey.CUMULATIVE ).print_stats() - return (result, (s.getvalue(), ps),) + return (result, (s.getvalue(), ps, s),) return wrapper From 08fb55eb64c0db5de459fe03f6b7d39517f64905 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 06:59:40 -0400 Subject: [PATCH 5/8] fixup: we don't need to assign self --- tubesync/sync/tasks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index bfec2d2e..10c91ca3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -195,9 +195,9 @@ def index_source_task(source_id): return rt[0] def time_model_function(instance, func): @time_func - def f(o, c): - return c(o) - rt = f(instance, func) + def f(c): + return c() + rt = f(func) elapsed = rt[1][0] log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') return rt[0] From cfb117b555cf42b1f9f79f19f2a6d863bf849426 Mon Sep 17 00:00:00 2001 From: tcely Date: Tue, 11 Mar 2025 21:10:42 -0400 Subject: [PATCH 6/8] Tweaked logged message --- tubesync/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 10c91ca3..9760b6cf 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -199,7 +199,7 @@ def index_source_task(source_id): return c() rt = f(func) elapsed = rt[1][0] - log.debug(f'time_model_function: {instance}: {func}: took {elapsed:.6f} seconds') + log.debug(f'time_model_function: {func}: took {elapsed:.6f} seconds') return rt[0] try: From 8314019781f6af02d11fa48b5837c9be15e32d65 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 14:59:58 -0400 Subject: [PATCH 7/8] Remove timing measurement from tasks.py --- tubesync/sync/tasks.py | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 9760b6cf..10ab6905 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -184,37 +184,16 @@ def index_source_task(source_id): Indexes media available from a Source object. ''' - from common.utils import time_func, profile_func - def get_source(source_id): - @time_func - def f(sid): - return Source.objects.get(pk=sid) - rt = f(source_id) - elapsed = rt[1][0] - log.debug(f'get_source: took {elapsed:.6f} seconds') - return rt[0] - def time_model_function(instance, func): - @time_func - def f(c): - return c() - rt = f(func) - elapsed = rt[1][0] - log.debug(f'time_model_function: {func}: took {elapsed:.6f} seconds') - return rt[0] - try: - #source = Source.objects.get(pk=source_id) - source = get_source(source_id) + source = Source.objects.get(pk=source_id) except Source.DoesNotExist: # Task triggered but the Source has been deleted, delete the task return # Reset any errors source.has_failed = False - #source.save() - time_model_function(source, source.save) + source.save() # Index the source - #videos = source.index_media() - videos = time_model_function(source, source.index_media) + videos = source.index_media() if not videos: raise NoMediaException(f'Source "{source}" (ID: {source_id}) returned no ' f'media to index, is the source key valid? Check the ' @@ -222,8 +201,7 @@ def index_source_task(source_id): f'is reachable') # Got some media, update the last crawl timestamp source.last_crawl = timezone.now() - #source.save() - time_model_function(source, source.save) + source.save() log.info(f'Found {len(videos)} media items for source: {source}') fields = lambda f, m: m.get_metadata_field(f) for video in videos: @@ -244,8 +222,7 @@ def index_source_task(source_id): if published_dt is not None: media.published = published_dt try: - #media.save() - time_model_function(media, media.save) + media.save() log.debug(f'Indexed media: {source} / {media}') # log the new media instances new_media_instance = ( From 393330a99fb6c351338f30d6e921212b143bfe68 Mon Sep 17 00:00:00 2001 From: tcely Date: Fri, 14 Mar 2025 15:03:22 -0400 Subject: [PATCH 8/8] Remove an extra blank line --- tubesync/sync/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 10ab6905..fdc954a3 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -183,7 +183,6 @@ def index_source_task(source_id): ''' Indexes media available from a Source object. ''' - try: source = Source.objects.get(pk=source_id) except Source.DoesNotExist: