diff --git a/guessit/options.py b/guessit/options.py index 63528b4..adf3bbc 100644 --- a/guessit/options.py +++ b/guessit/options.py @@ -30,6 +30,10 @@ def build_argument_parser(): help='If short date is found, consider the second digits as the day.') naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', help='Allowed language (can be used multiple times)') + naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', + default=False, + help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, ' + 'it will be guessed as season 2, episodeNumber 13') output_opts = opts.add_argument_group("Output") output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False, @@ -47,6 +51,8 @@ def build_argument_parser(): output_opts.add_argument('-f', '--input-file', dest='input_file', default=False, help='Read filenames from an input file.') + + information_opts = opts.add_argument_group("Information") information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False, help='Display properties that can be guessed.') diff --git a/guessit/rules/common/__init__.py b/guessit/rules/common/__init__.py index 6853429..a9cf6a5 100644 --- a/guessit/rules/common/__init__.py +++ b/guessit/rules/common/__init__.py @@ -5,6 +5,6 @@ Common module """ import six -seps = six.u(r' [](){}!?+*|&=%§-_~#/\.,;') # list of tags/words separators +seps = six.u(r' [](){}!?+*|&=§-_~#/\.,;') # list of tags/words separators dash = (six.u(r'-'), six.u(r'[\W_]?')) # abbreviation used by many rebulk objects. diff --git a/guessit/rules/properties/bonus.py b/guessit/rules/properties/bonus.py index 52b5820..ae67750 100644 --- a/guessit/rules/properties/bonus.py +++ b/guessit/rules/properties/bonus.py @@ -11,10 +11,12 @@ from ..common.validators import seps_surround BONUS = Rebulk().regex_defaults(flags=re.IGNORECASE) + BONUS.regex(r'x(\d+)', name='bonusNumber', private_parent=True, children=True, formatter=int, validator={'__parent__': lambda match: seps_surround}, conflict_solver=lambda match, conflicting: match - if conflicting.name in ['videoCodec', 'episodeNumber'] else '__default__') + if conflicting.name in ['videoCodec', 'episodeNumber'] and 'bonus-conflict' not in conflicting.tags + else '__default__') class BonusTitleRule(AppendMatchRule): diff --git a/guessit/rules/properties/crc.py b/guessit/rules/properties/crc.py index f42290b..a754d40 100644 --- a/guessit/rules/properties/crc.py +++ b/guessit/rules/properties/crc.py @@ -28,8 +28,10 @@ def guess_idnumber(string): :rtype: """ #pylint:disable=invalid-name - match = _idnum.search(string) - if match is not None: + ret = [] + + matches = list(_idnum.finditer(string)) + for match in matches: result = match.groupdict() switch_count = 0 switch_letter_count = 0 @@ -60,7 +62,9 @@ def guess_idnumber(string): letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1 if switch_ratio > 0.4 and letters_ratio > 0.4: - return match.span() + ret.append(match.span()) + + return ret CRC.functional(guess_idnumber, name='idNumber') diff --git a/guessit/rules/properties/episode_title.py b/guessit/rules/properties/episode_title.py index 640210d..9d9cf6a 100644 --- a/guessit/rules/properties/episode_title.py +++ b/guessit/rules/properties/episode_title.py @@ -13,19 +13,21 @@ class EpisodeTitleFromPosition(AppendMatchRule): Add episode title match in existing matches Must run after TitleFromPosition rule. """ - priority = 10 + priority = 9 # Just after main title def when(self, matches, context): filename = matches.markers.named('path', -1) start, end = filename.span - second_hole = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title), - predicate=lambda hole: hole.value, index=1) - if second_hole: - episode = matches.previous(second_hole, lambda previous: previous.name in ['episodeNumber', 'season'], 0) + holes = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title), + predicate=lambda hole: hole.value) + + for hole in holes: + episode = matches.previous(hole, + lambda previous: previous.name in ['episodeNumber', 'season', 'date'], 0) if episode: group_markers = matches.markers.named('group') - title = second_hole.crop(group_markers, index=0) + title = hole.crop(group_markers, index=0) if title and title.value: title.name = 'episodeTitle' diff --git a/guessit/rules/properties/episodes.py b/guessit/rules/properties/episodes.py index 7b15d49..42623af 100644 --- a/guessit/rules/properties/episodes.py +++ b/guessit/rules/properties/episodes.py @@ -4,18 +4,70 @@ Season/Episode numbering support """ -from rebulk import Rebulk +from rebulk import Rebulk, RemoveMatchRule import regex as re +from ..common.validators import seps_surround +from guessit.rules.common import dash -EPISODES = Rebulk().regex_defaults(flags=re.IGNORECASE) +EPISODES = Rebulk().defaults(validate_all=True, validator={'__parent__': seps_surround}) +EPISODES.regex_defaults(flags=re.IGNORECASE, children=True) EPISODES.regex(r'(?P\d+)x(?P\d+)', r'S(?P\d+)[ex](?P\d+)', + r'S(?P\d+)xe(?P\d+)', formatter=int, - children=True, private_parent=True, + tags=['SxxExx'], conflict_solver=lambda match, other: match if match.name in ['season', 'episodeNumber'] and other.name == 'screenSize' else '__default__') + +no_zero_validator = {'__parent__': seps_surround, + 'season': lambda match: match.value > 0, 'episodeNumber': lambda match: match.value > 0} + + +EPISODES.regex(r'(?P\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int) +EPISODES.regex(r'0(?P\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int) +EPISODES.regex(r'(?P\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int, + validator=no_zero_validator, + disabled=lambda context: not context.get('episode_prefer_number', False)) + +EPISODES.regex(r'(?P\d{1})(?P\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int, + validator=no_zero_validator, + disabled=lambda context: context.get('episode_prefer_number', False)) +EPISODES.regex(r'(?P\d{2})(?P\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int, + validator=no_zero_validator, + conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', + disabled=lambda context: context.get('episode_prefer_number', False)) + +# Harcoded movie to disable weak season/episodes +EPISODES.regex('OSS-117', + abbreviations=[dash], name="hardcoded-movies", marker=True, + conflict_solver=lambda match, other: None, children=False) + + +class RemoveWeakIfMovie(RemoveMatchRule): + """ + Remove weak-movie tagged matches if it seems to be a movie. + """ + priority = 550 + + def when(self, matches, context): + if matches.named('year') or matches.markers.named('hardcoded-movies'): + return matches.tagged('weak-movie') + + +class RemoveWeakIfSxxExx(RemoveMatchRule): + """ + Remove weak-movie tagged matches if SxxExx pattern is matched. + """ + priority = 550 + + def when(self, matches, context): + if matches.tagged('SxxExx'): + return matches.tagged('weak-movie') + +EPISODES.rules(RemoveWeakIfMovie, RemoveWeakIfSxxExx) + diff --git a/guessit/test/rules/episodes.yml b/guessit/test/rules/episodes.yml index 1b9426d..80cf45f 100644 --- a/guessit/test/rules/episodes.yml +++ b/guessit/test/rules/episodes.yml @@ -14,4 +14,14 @@ ? -3x05 ? -2x06 : season: 2 - episodeNumber: 5 \ No newline at end of file + episodeNumber: 5 + +? "+0102" +? "+102" +: season: 1 + episodeNumber: 2 + +? "0102 S03E04" +? "S03E04 102" +: season: 3 + episodeNumber: 4 diff --git a/guessit/test/series.yml b/guessit/test/series.yml index f151b0a..21ed839 100644 --- a/guessit/test/series.yml +++ b/guessit/test/series.yml @@ -42,3 +42,34 @@ episodeNumber: 8 episodeTitle: A Bas Le Sergent Skinner language: French + +? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi +: title: Duckman + season: 1 + episodeNumber: 1 + episodeTitle: I, Duckman + date: 2002-11-07 + +? Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.avi +: title: The Simpsons + season: 12 + episodeNumber: 8 + episodeTitle: A Bas Le Sergent Skinner + language: French + +? Series/Futurama/Season 3 (mkv)/[™] Futurama - S03E22 - Le chef de fer à 30% ( 30 Percent Iron Chef ).mkv +: title: Futurama + season: 3 + episodeNumber: 22 + episodeTitle: Le chef de fer à 30% + +? Series/The Office/Season 6/The Office - S06xE01.avi +: title: The Office + season: 6 + episodeNumber: 1 + +? series/The Office/Season 4/The Office [401] Fun Run.avi +: title: The Office + season: 4 + episodeNumber: 1 + episodeTitle: Fun Run