Add option --parse-metadata

* The fields extracted by this can be used in `--output` * Deprecated `--metadata-from-title` :ci skip dl
2025-09-28 18:04:49 +00:00 · 2021-01-26 15:50:20 +05:30
parent 9882064024
commit 5bfa486205
8 changed files with 162 additions and 110 deletions
--- a/youtube_dlc/postprocessor/init.py
+++ b/youtube_dlc/postprocessor/init.py
@@ -16,7 +16,8 @@ from .ffmpeg import (
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
-from .metadatafromtitle import MetadataFromTitlePP
+from .metadatafromfield import MetadataFromFieldPP
+from .metadatafromfield import MetadataFromTitlePP
 from .movefilesafterdownload import MoveFilesAfterDownloadPP
 from .sponskrub import SponSkrubPP

@@ -39,6 +40,7 @@ __all__ = [
    'FFmpegSubtitlesConvertorPP',
    'FFmpegVideoConvertorPP',
    'FFmpegVideoRemuxerPP',
+    'MetadataFromFieldPP',
    'MetadataFromTitlePP',
    'MoveFilesAfterDownloadPP',
    'SponSkrubPP',
--- a/youtube_dlc/postprocessor/metadatafromfield.py
+++ b/youtube_dlc/postprocessor/metadatafromfield.py
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+from ..compat import compat_str
+
+
+class MetadataFromFieldPP(PostProcessor):
+    regex = r'(?P<field>\w+):(?P<format>.+)$'
+
+    def __init__(self, downloader, formats):
+        PostProcessor.__init__(self, downloader)
+        assert isinstance(formats, (list, tuple))
+        self._data = []
+        for f in formats:
+            assert isinstance(f, compat_str)
+            match = re.match(self.regex, f)
+            assert match is not None
+            self._data.append({
+                'field': match.group('field'),
+                'format': match.group('format'),
+                'regex': self.format_to_regex(match.group('format'))})
+
+    def format_to_regex(self, fmt):
+        r"""
+        Converts a string like
+           '%(title)s - %(artist)s'
+        to a regex like
+           '(?P<title>.+)\ \-\ (?P<artist>.+)'
+        """
+        if not re.search(r'%\(\w+\)s', fmt):
+            return fmt
+        lastpos = 0
+        regex = ''
+        # replace %(..)s with regex group and escape other string parts
+        for match in re.finditer(r'%\((\w+)\)s', fmt):
+            regex += re.escape(fmt[lastpos:match.start()])
+            regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)'
+            lastpos = match.end()
+        if lastpos < len(fmt):
+            regex += re.escape(fmt[lastpos:])
+        return regex
+
+    def run(self, info):
+        for dictn in self._data:
+            field, regex = dictn['field'], dictn['regex']
+            if field not in info:
+                self.report_warning('Video doesnot have a %s' % field)
+                continue
+            self.write_debug('Searching for r"%s" in %s' % (regex, field))
+            match = re.search(regex, info[field])
+            if match is None:
+                self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format']))
+                continue
+            for attribute, value in match.groupdict().items():
+                info[attribute] = value
+                self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA'))
+        return [], info
+
+
+class MetadataFromTitlePP(MetadataFromFieldPP):  # for backward compatibility
+    def __init__(self, downloader, titleformat):
+        super(MetadataFromTitlePP, self).__init__(downloader, ['title:%s' % titleformat])
+        self._titleformat = titleformat
+        self._titleregex = self._data[0]['regex']
--- a/youtube_dlc/postprocessor/metadatafromtitle.py
+++ b/youtube_dlc/postprocessor/metadatafromtitle.py
@@ -1,44 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import PostProcessor
-
-
-class MetadataFromTitlePP(PostProcessor):
-    def __init__(self, downloader, titleformat):
-        super(MetadataFromTitlePP, self).__init__(downloader)
-        self._titleformat = titleformat
-        self._titleregex = (self.format_to_regex(titleformat)
-                            if re.search(r'%\(\w+\)s', titleformat)
-                            else titleformat)
-
-    def format_to_regex(self, fmt):
-        r"""
-        Converts a string like
-           '%(title)s - %(artist)s'
-        to a regex like
-           '(?P<title>.+)\ \-\ (?P<artist>.+)'
-        """
-        lastpos = 0
-        regex = ''
-        # replace %(..)s with regex group and escape other string parts
-        for match in re.finditer(r'%\((\w+)\)s', fmt):
-            regex += re.escape(fmt[lastpos:match.start()])
-            regex += r'(?P<' + match.group(1) + '>.+)'
-            lastpos = match.end()
-        if lastpos < len(fmt):
-            regex += re.escape(fmt[lastpos:])
-        return regex
-
-    def run(self, info):
-        title = info['title']
-        match = re.match(self._titleregex, title)
-        if match is None:
-            self.to_screen('Could not interpret title of video as "%s"' % self._titleformat)
-            return [], info
-        for attribute, value in match.groupdict().items():
-            info[attribute] = value
-            self.to_screen('parsed %s: %s' % (attribute, value if value is not None else 'NA'))
-
-        return [], info