Add weak episodeNumber/season patterns

This commit is contained in:
Toilal
2015-10-24 22:41:05 +02:00
parent 49f797158c
commit 94c670086d
8 changed files with 122 additions and 15 deletions
+6
View File
@@ -30,6 +30,10 @@ def build_argument_parser():
help='If short date is found, consider the second digits as the day.')
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages',
help='Allowed language (can be used multiple times)')
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
default=False,
help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, '
'it will be guessed as season 2, episodeNumber 13')
output_opts = opts.add_argument_group("Output")
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
@@ -47,6 +51,8 @@ def build_argument_parser():
output_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
help='Read filenames from an input file.')
information_opts = opts.add_argument_group("Information")
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=False,
help='Display properties that can be guessed.')
+1 -1
View File
@@ -5,6 +5,6 @@ Common module
"""
import six
seps = six.u(r' [](){}!?+*|&=%§-_~#/\.,;') # list of tags/words separators
seps = six.u(r' [](){}!?+*|&=§-_~#/\.,;') # list of tags/words separators
dash = (six.u(r'-'), six.u(r'[\W_]?')) # abbreviation used by many rebulk objects.
+3 -1
View File
@@ -11,10 +11,12 @@ from ..common.validators import seps_surround
BONUS = Rebulk().regex_defaults(flags=re.IGNORECASE)
BONUS.regex(r'x(\d+)', name='bonusNumber', private_parent=True, children=True, formatter=int,
validator={'__parent__': lambda match: seps_surround},
conflict_solver=lambda match, conflicting: match
if conflicting.name in ['videoCodec', 'episodeNumber'] else '__default__')
if conflicting.name in ['videoCodec', 'episodeNumber'] and 'bonus-conflict' not in conflicting.tags
else '__default__')
class BonusTitleRule(AppendMatchRule):
+7 -3
View File
@@ -28,8 +28,10 @@ def guess_idnumber(string):
:rtype:
"""
#pylint:disable=invalid-name
match = _idnum.search(string)
if match is not None:
ret = []
matches = list(_idnum.finditer(string))
for match in matches:
result = match.groupdict()
switch_count = 0
switch_letter_count = 0
@@ -60,7 +62,9 @@ def guess_idnumber(string):
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
if switch_ratio > 0.4 and letters_ratio > 0.4:
return match.span()
ret.append(match.span())
return ret
CRC.functional(guess_idnumber, name='idNumber')
+8 -6
View File
@@ -13,19 +13,21 @@ class EpisodeTitleFromPosition(AppendMatchRule):
Add episode title match in existing matches
Must run after TitleFromPosition rule.
"""
priority = 10
priority = 9 # Just after main title
def when(self, matches, context):
filename = matches.markers.named('path', -1)
start, end = filename.span
second_hole = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
predicate=lambda hole: hole.value, index=1)
if second_hole:
episode = matches.previous(second_hole, lambda previous: previous.name in ['episodeNumber', 'season'], 0)
holes = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
predicate=lambda hole: hole.value)
for hole in holes:
episode = matches.previous(hole,
lambda previous: previous.name in ['episodeNumber', 'season', 'date'], 0)
if episode:
group_markers = matches.markers.named('group')
title = second_hole.crop(group_markers, index=0)
title = hole.crop(group_markers, index=0)
if title and title.value:
title.name = 'episodeTitle'
+55 -3
View File
@@ -4,18 +4,70 @@
Season/Episode numbering support
"""
from rebulk import Rebulk
from rebulk import Rebulk, RemoveMatchRule
import regex as re
from ..common.validators import seps_surround
from guessit.rules.common import dash
EPISODES = Rebulk().regex_defaults(flags=re.IGNORECASE)
EPISODES = Rebulk().defaults(validate_all=True, validator={'__parent__': seps_surround})
EPISODES.regex_defaults(flags=re.IGNORECASE, children=True)
EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)',
r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)',
formatter=int,
children=True,
private_parent=True,
tags=['SxxExx'],
conflict_solver=lambda match, other: match
if match.name in ['season', 'episodeNumber']
and other.name == 'screenSize'
else '__default__')
no_zero_validator = {'__parent__': seps_surround,
'season': lambda match: match.value > 0, 'episodeNumber': lambda match: match.value > 0}
EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
validator=no_zero_validator,
disabled=lambda context: not context.get('episode_prefer_number', False))
EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
validator=no_zero_validator,
disabled=lambda context: context.get('episode_prefer_number', False))
EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
validator=no_zero_validator,
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
disabled=lambda context: context.get('episode_prefer_number', False))
# Harcoded movie to disable weak season/episodes
EPISODES.regex('OSS-117',
abbreviations=[dash], name="hardcoded-movies", marker=True,
conflict_solver=lambda match, other: None, children=False)
class RemoveWeakIfMovie(RemoveMatchRule):
"""
Remove weak-movie tagged matches if it seems to be a movie.
"""
priority = 550
def when(self, matches, context):
if matches.named('year') or matches.markers.named('hardcoded-movies'):
return matches.tagged('weak-movie')
class RemoveWeakIfSxxExx(RemoveMatchRule):
"""
Remove weak-movie tagged matches if SxxExx pattern is matched.
"""
priority = 550
def when(self, matches, context):
if matches.tagged('SxxExx'):
return matches.tagged('weak-movie')
EPISODES.rules(RemoveWeakIfMovie, RemoveWeakIfSxxExx)
+11 -1
View File
@@ -14,4 +14,14 @@
? -3x05
? -2x06
: season: 2
episodeNumber: 5
episodeNumber: 5
? "+0102"
? "+102"
: season: 1
episodeNumber: 2
? "0102 S03E04"
? "S03E04 102"
: season: 3
episodeNumber: 4
+31
View File
@@ -42,3 +42,34 @@
episodeNumber: 8
episodeTitle: A Bas Le Sergent Skinner
language: French
? Series/Duckman/Duckman - 101 (01) - 20021107 - I, Duckman.avi
: title: Duckman
season: 1
episodeNumber: 1
episodeTitle: I, Duckman
date: 2002-11-07
? Series/Simpsons/Saison 12 Français/Simpsons,.The.12x08.A.Bas.Le.Sergent.Skinner.FR.avi
: title: The Simpsons
season: 12
episodeNumber: 8
episodeTitle: A Bas Le Sergent Skinner
language: French
? Series/Futurama/Season 3 (mkv)/[™] Futurama - S03E22 - Le chef de fer à 30% ( 30 Percent Iron Chef ).mkv
: title: Futurama
season: 3
episodeNumber: 22
episodeTitle: Le chef de fer à 30%
? Series/The Office/Season 6/The Office - S06xE01.avi
: title: The Office
season: 6
episodeNumber: 1
? series/The Office/Season 4/The Office [401] Fun Run.avi
: title: The Office
season: 4
episodeNumber: 1
episodeTitle: Fun Run