Add more season/episodeNumber support

2026-06-04 19:05:44 +00:00 · 2015-10-25 19:20:49 +01:00
parent 94c670086d
commit 485ac71410
11 changed files with 108 additions and 51 deletions
@@ -5,6 +5,10 @@ Entry point module
 """
 # pragma: no cover
 from __future__ import print_function
+
+from rebulk import debug
+debug.DEBUG = True
+
 from collections import OrderedDict
 import os
 import logging
@@ -6,6 +6,7 @@ Formatters

 from . import seps
 import regex as re
+from rebulk.formatters import formatters

 _excluded_clean_chars = ',:;-/\\'
 clean_chars = ""
@@ -38,6 +39,17 @@ def strip(input_string):
    return input_string.strip(seps)


+def raw_cleanup(raw):
+    """
+    Cleanup a raw value to perform raw comparison
+    :param raw:
+    :type raw:
+    :return:
+    :rtype:
+    """
+    return formatters(cleanup, strip)(raw.lower())
+
+
 def reorder_title(title, articles=('the',), separators=(',', ', ')):
    """
    Reorder the title
@@ -57,19 +69,3 @@ def reorder_title(title, articles=('the',), separators=(',', ', ')):
            if ltitle[-len(suffix):] == suffix:
                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
    return title
-
-
-def chain(*formatters):
-    """
-    Chain formatter functions
-    :param functions:
-    :type functions:
-    :return:
-    :rtype:
-    """
-    def formatters_chain(input_string):  # pylint:disable=missing-docstring
-        for formatter in formatters:
-            input_string = formatter(input_string)
-        return input_string
-
-    return formatters_chain
@@ -10,7 +10,8 @@ from .common.comparators import marker_sorted

 def prefer_last_path(matches):
    """
-    If multiple match are found, keep the one in the most valuable filepart.
+    If multiple match are found with same name, keep the one in the most valuable filepart.
+    Also keep others match with same value than those in mose valuable filepart.

    :param matches:
    :param context:
@@ -18,17 +19,24 @@ def prefer_last_path(matches):
    """
    filepart = marker_sorted(matches.markers.named('path'), matches)[0]
    for name in matches.names:
-        named_list = matches.named(name)
-        if len(named_list) > 1:
+        name_matches = matches.named(name)
+        if len(name_matches) > 1:
            keep_list = []
-            for named in named_list:
-                marker = matches.markers.at_match(named, lambda marker: marker is filepart, 0)
+            keep_values = []
+            for name_match in name_matches:
+                marker = matches.markers.at_match(name_match, lambda marker: marker is filepart, 0)
                if marker:
-                    keep_list.append(named)
+                    keep_list.append(name_match)
+                    keep_values.append(name_match.value)
+
+            for name_match in name_matches:
+                if name_match not in keep_list and name_match.value in keep_values:
+                    keep_list.append(name_match)
+
            if keep_list:
-                for named in named_list:
-                    if named not in keep_list:
-                        matches.remove(named)
+                for name_match in name_matches:
+                    if name_match not in keep_list:
+                        matches.remove(name_match)


 def enlarge_group_matches(matches):
@@ -53,4 +61,4 @@ def enlarge_group_matches(matches):
            matches.append(match)


-PROCESSORS = Rebulk().processor(prefer_last_path, enlarge_group_matches)
+PROCESSORS = Rebulk().processor(enlarge_group_matches).post_processor(prefer_last_path)
@@ -4,8 +4,9 @@
 Episode title
 """
 from rebulk import Rebulk, AppendMatchRule
+from rebulk.formatters import formatters

-from ..common.formatters import cleanup, reorder_title, chain
+from ..common.formatters import cleanup, reorder_title


 class EpisodeTitleFromPosition(AppendMatchRule):
@@ -19,7 +20,7 @@ class EpisodeTitleFromPosition(AppendMatchRule):
        filename = matches.markers.named('path', -1)
        start, end = filename.span

-        holes = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
+        holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
                              predicate=lambda hole: hole.value)

        for hole in holes:
@@ -9,13 +9,14 @@ from rebulk import Rebulk, RemoveMatchRule
 import regex as re
 from ..common.validators import seps_surround
 from guessit.rules.common import dash
+from ..common.numeral import numeral, parse_numeral

 EPISODES = Rebulk().defaults(validate_all=True, validator={'__parent__': seps_surround})
 EPISODES.regex_defaults(flags=re.IGNORECASE, children=True)

-EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
-               r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)',
-               r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)',
+EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',  # 01x02
+               r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)',  # S01E02, S01x02
+               r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)',  # S01Ex02
               formatter=int,
               private_parent=True,
               tags=['SxxExx'],
@@ -24,20 +25,31 @@ EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
               and other.name == 'screenSize'
               else '__default__')

+season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
+episode_words = ['episode', 'episodes']
+
+EPISODES.regex(r'\L<season_words>-(?P<season>' + numeral + ')', season_words=season_words,  # Season 1, # Season one
+               abbreviations=[dash], formatter=parse_numeral)
+
+season_markers = ['s']
+episode_markers = ['e', 'ep']
+
+
 no_zero_validator = {'__parent__': seps_surround,
                     'season': lambda match: match.value > 0, 'episodeNumber': lambda match: match.value > 0}

-
-EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
-EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
-EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
+EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)  # 12
+EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)  # 02, 012
+EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int,  # 112, 113
               validator=no_zero_validator,
               disabled=lambda context: not context.get('episode_prefer_number', False))

-EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
+EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'],  # 102
+               formatter=int,
               validator=no_zero_validator,
               disabled=lambda context: context.get('episode_prefer_number', False))
-EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
+EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'],  # 0102
+               formatter=int,
               validator=no_zero_validator,
               conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
               disabled=lambda context: context.get('episode_prefer_number', False))
@@ -11,6 +11,7 @@ import regex as re
 from ..common import dash
 from ..common import seps
 from ..common.validators import seps_surround
+from guessit.rules.common.formatters import raw_cleanup

 OTHER = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
 OTHER.defaults(name="other", validator=seps_surround)
@@ -88,9 +89,12 @@ def proper_count(matches):
    """
    propers = matches.named('other', lambda match: match.value == 'Proper')
    if propers:
+        raws = {}  # Count distinct raw values
+        for proper in propers:
+            raws[raw_cleanup(proper.raw)] = proper
        proper_count_match = copy.copy(propers[-1])
        proper_count_match.name = 'properCount'
-        proper_count_match.value = len(propers)
+        proper_count_match.value = len(raws)
        matches.append(proper_count_match)


@@ -4,8 +4,9 @@
 Title
 """
 from rebulk import Rebulk, RemoveMatchRule, AppendRemoveMatchRule
+from rebulk.formatters import formatters

-from ..common.formatters import cleanup, reorder_title, chain
+from ..common.formatters import cleanup, reorder_title
 from ..common.comparators import marker_sorted
 from ..common import seps
 from rebulk.rules import AppendRemoveMatchRule
@@ -31,7 +32,7 @@ class TitleFromPosition(AppendRemoveMatchRule):
        """
        start, end = filepart.span

-        first_hole = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
+        first_hole = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
                                   ignore=TitleFromPosition.ignore_language,
                                   predicate=lambda hole: hole.value, index=0)

@@ -127,20 +128,33 @@ class PreferTitleWithYear(RemoveMatchRule):
    priority = -255

    def when(self, matches, context):
-        with_year = []
-        without_year = []
+        to_keep = []
+        to_remove = []

        for title in matches.named('title'):
            filepart = matches.markers.at_match(title, lambda marker: marker.name == 'path', 0)
            if filepart:
                year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
                if year_match:
-                    with_year.append(title)
+                    to_keep.append(title)
                else:
-                    without_year.append(title)
+                    to_remove.append(title)

-        if with_year:
-            return without_year
+        if to_keep:
+            title_values = set([title.value for title in to_keep])
+            if len(title_values) > 1:
+                # We have distinct values for title with year. Keep only values from most valuable filepart.
+                fileparts = marker_sorted(matches.markers.named('path'), matches)
+                best_title = None
+                for filepart in fileparts:
+                    best_title = matches.range(filepart.start, filepart.end, lambda match: match.name == 'title', 0)
+                    if best_title:
+                        break
+                for title in to_keep:
+                    if title.value != best_title.value:
+                        to_remove.append(title)
+                        to_keep.remove(title)
+            return to_remove


 TITLE = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
@@ -25,3 +25,13 @@
 ? "S03E04 102"
 : season: 3
  episodeNumber: 4
+
+? +serie Saison 2 other
+? +serie Season 2 other
+? +serie Saisons 2 other
+? +serie Seasons 2 other
+? +serie Serie 2 other
+? +serie Series 2 other
+? +serie Season Two other
+? +serie Season II other
+: season: 2
@@ -73,3 +73,9 @@
  season: 4
  episodeNumber: 1
  episodeTitle: Fun Run
+
+? Series/Mad Men Season 1 Complete/Mad.Men.S01E01.avi
+: title: Mad Men
+  season: 1
+  episodeNumber: 1
+  other: Complete
@@ -2,6 +2,8 @@
 # -*- coding: utf-8 -*-
 # pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
 import logging
+logger = logging.getLogger(__name__)
+
 from collections import OrderedDict

 import babelfish
@@ -192,13 +194,13 @@ class TestYml(object):
            if not string_predicate or string_predicate(string):  # pylint: disable=not-callable
                entry = self.check(string, expected)
                if entry.ok:
-                    logging.debug(u'[' + filename + '] ' + six.text_type(entry))
+                    logger.debug(u'[' + filename + '] ' + six.text_type(entry))
                elif entry.warning:
-                    logging.warning(u'[' + filename + '] ' + six.text_type(entry))
+                    logger.warning(u'[' + filename + '] ' + six.text_type(entry))
                elif entry.error:
-                    logging.error(u'[' + filename + '] ' + six.text_type(entry))
+                    logger.error(u'[' + filename + '] ' + six.text_type(entry))
                    for line in entry.details:
-                        logging.error(u'[' + filename + '] ' + ' ' * 4 + line)
+                        logger.error(u'[' + filename + '] ' + ' ' * 4 + line)
                entries.append(entry)
        entries.assert_ok()

@@ -1,2 +1,2 @@
 [pytest]
-addopts = --ignore=setup.py --doctest-modules --doctest-glob='README.rst'
+addopts =-s --ignore=setup.py --doctest-modules --doctest-glob='README.rst'