From 8c22b6c99efb464dfb450707440c6162f04b7b46 Mon Sep 17 00:00:00 2001
From: tcely <tcely@users.noreply.github.com>
Date: Tue, 7 Jan 2025 00:05:57 -0500
Subject: [PATCH] Add response filtering

These functions aren't being used yet, they will be tested against my database before that happens.
---
 tubesync/sync/utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tubesync/sync/utils.py b/tubesync/sync/utils.py
index 3e29fe3f..e44cef1f 100644
--- a/tubesync/sync/utils.py
+++ b/tubesync/sync/utils.py
@@ -170,6 +170,68 @@ def normalize_codec(codec_str):
     return result
 
 
+def _url_keys(arg_dict, filter_func):
+    result = {}
+    for key in arg_dict.keys():
+        if 'url' in key:
+            result.update(
+                {key: (key, filter_func(key=key, url=arg_dict[key]),)}
+            )
+    return result
+
+
+def _drop_url_keys(arg_dict, key, filter_func):
+    if key in arg_dict.keys():
+        for val_dict in arg_dict[key]:
+            for url_key in _url_keys(val_dict, filter_func):
+                if url_key[1] is True:
+                    del val_dict[url_key[0]]
+
+
+def filter_response(response_dict):
+    '''
+        Clean up the response so as to not store useless metadata in the database.
+    '''
+    # raise an exception for an unexpected argument type
+    if not isinstance(filedata, dict):
+        raise TypeError(f'filedata must be a dict, got "{type(filedata)}"')
+    # optimize the empty case
+    if not response_dict:
+        return response_dict
+
+    # beginning of formats cleanup {{{
+    # drop urls that expire, or restrict IPs
+    def drop_format_url(**kwargs):
+        url = kwargs['url']
+        return (
+            url
+            and '://' in url
+            and (
+                '/ip/' in url
+                or '/expire/' in url
+            )
+        )
+
+    _drop_url_keys(response_dict, 'formats', drop_format_url)
+    _drop_url_keys(response_dict, 'requested_formats', drop_format_url)
+    # end of formats cleanup }}}
+
+    # beginning of automatic_captions cleanup {{{
+    # drop urls that expire, or restrict IPs
+    def drop_auto_caption_url(**kwargs):
+        url = kwargs['url']
+        return (
+            url
+            and '://' in url
+            and '&expire=' in url
+        )
+
+    _drop_url_keys(response_dict, 'automatic_captions', drop_auto_caption_url)
+    # end of automatic_captions cleanup }}}
+
+    return response_dict
+
+
 def parse_media_format(format_dict):
     '''
         This parser primarily adapts the format dict returned by youtube-dl into a