From d7d673227b40e8e8c0516c6a52f9d42687d98218 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Alvergnat?= Date: Fri, 19 Feb 2021 00:31:08 +0100 Subject: [PATCH] refactor(audio_channels): move more patterns to configuration --- guessit/config/__init__.py | 61 ++++++++++++++++++++++--- guessit/config/options.json | 11 +++-- guessit/rules/properties/audio_codec.py | 23 ++++------ 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/guessit/config/__init__.py b/guessit/config/__init__.py index d945787..c1c71b5 100644 --- a/guessit/config/__init__.py +++ b/guessit/config/__init__.py @@ -1,12 +1,44 @@ """ Config module. """ +from importlib import import_module + from rebulk import Rebulk _regex_prefix = 're:' +_function_prefix = 'fn:' + +_function_cache = {} -def load_config_patterns(rebulk: Rebulk, config: dict): +def _process_option(name, value): + if name == 'validator': + return _process_option_validator(value) + return value + + +def _process_option_validator(value): + if value.startswith(_function_prefix): + function_id = value[len(_function_prefix):] + if function_id in _function_cache: + return _function_cache[function_id] + if '.' in function_id: + module_name, func_name = function_id.rsplit('.', 1) + else: + module_name = "guessit.rules.common.validators" + func_name = function_id + mod = import_module(module_name) + func = getattr(mod, func_name) + _function_cache[function_id] = func + return func + return value + + +def load_config_patterns(rebulk: Rebulk, + config: dict, + pattern_options: dict = None, + regex_options: dict = None, + string_options: dict = None): """ Load patterns defined in given config. :param rebulk: Rebulk builder to use. @@ -16,16 +48,31 @@ def load_config_patterns(rebulk: Rebulk, config: dict): for value, items in config.items(): patterns = items if isinstance(items, list) else [items] for pattern in patterns: + options = dict(pattern_options) if pattern_options else {} if isinstance(pattern, dict): - kwargs = dict(pattern) - pattern = kwargs.pop('pattern') + options.update(pattern) + pattern = options.get('pattern') else: - kwargs = {} - regex = kwargs.pop('regex', False) + options = {} + regex = options.get('regex', False) + if not regex and pattern.startswith(_regex_prefix): regex = True pattern = pattern[len(_regex_prefix):] + + if regex and regex_options: + options.update(regex_options) + elif not regex and string_options: + options.update(string_options) + if isinstance(pattern, dict): + options.update(pattern) + + options.pop('pattern', None) + options.pop('regex', None) + + options = {name: _process_option(name, value) for name, value in options.items()} + if regex: - rebulk.regex(pattern, value=value, **kwargs) + rebulk.regex(pattern, value=value, **options) else: - rebulk.string(pattern, value=value, **kwargs) + rebulk.string(pattern, value=value, **options) diff --git a/guessit/config/options.json b/guessit/config/options.json index 5a343bc..ca3351d 100644 --- a/guessit/config/options.json +++ b/guessit/config/options.json @@ -60,18 +60,21 @@ "2.0": [ "2ch", "stereo", - "re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)" + {"pattern": "re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)", "children": true}, + {"pattern": "20", "validator": "fn:seps_after", "tags": "weak-audio_channels"} ], "5.1": [ "5ch", "6ch", - "re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)", - "re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)" + {"pattern": "re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)", "children": true}, + {"pattern": "re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)", "children": true}, + {"pattern": "re:5[01]", "validator": "fn:seps_after", "tags": "weak-audio_channels"} ], "7.1": [ "7ch", "8ch", - "re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)" + {"pattern": "re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)", "children": true}, + {"pattern": "re:7[01]", "validator": "fn:seps_after", "tags": "weak-audio_channels"} ] } }, diff --git a/guessit/rules/properties/audio_codec.py b/guessit/rules/properties/audio_codec.py index 0aa7d31..f256884 100644 --- a/guessit/rules/properties/audio_codec.py +++ b/guessit/rules/properties/audio_codec.py @@ -9,6 +9,7 @@ from rebulk.remodule import re from ..common import dash from ..common.pattern import is_disabled from ..common.validators import seps_before, seps_after +from ...config import load_config_patterns audio_properties = ['audio_codec', 'audio_profile', 'audio_channels'] @@ -22,8 +23,8 @@ def audio_codec(config): # pylint:disable=unused-argument :return: Created Rebulk object :rtype: Rebulk """ - rebulk = Rebulk()\ - .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\ + rebulk = Rebulk() \ + .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) \ .string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): @@ -76,16 +77,8 @@ def audio_codec(config): # pylint:disable=unused-argument rebulk.defaults(clear=True, name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) - rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') - rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') - rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') - for value, items in config.get('audio_channels').items(): - for item in items: - if item.startswith('re:'): - rebulk.regex(item[3:], value=value, children=True) - else: - rebulk.string(item, value=value) + load_config_patterns(rebulk, config.get('audio_channels')) rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) @@ -139,20 +132,20 @@ class AudioProfileRule(Rule): def when(self, matches, context): profile_list = matches.named('audio_profile', lambda match: 'audio_profile.rule' in match.tags and - self.codec in match.tags) + self.codec in match.tags) ret = [] for profile in profile_list: codec = matches.at_span(profile.span, lambda match: match.name == 'audio_codec' and - match.value == self.codec, 0) + match.value == self.codec, 0) if not codec: codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and - match.value == self.codec) + match.value == self.codec) if not codec: codec = matches.next(profile, lambda match: match.name == 'audio_codec' and - match.value == self.codec) + match.value == self.codec) if not codec: ret.append(profile) if codec: