Add more season/episodeNumber support

This commit is contained in:
Toilal
2015-10-25 19:20:49 +01:00
parent 94c670086d
commit 485ac71410
11 changed files with 108 additions and 51 deletions
+4
View File
@@ -5,6 +5,10 @@ Entry point module
"""
# pragma: no cover
from __future__ import print_function
from rebulk import debug
debug.DEBUG = True
from collections import OrderedDict
import os
import logging
+12 -16
View File
@@ -6,6 +6,7 @@ Formatters
from . import seps
import regex as re
from rebulk.formatters import formatters
_excluded_clean_chars = ',:;-/\\'
clean_chars = ""
@@ -38,6 +39,17 @@ def strip(input_string):
return input_string.strip(seps)
def raw_cleanup(raw):
"""
Cleanup a raw value to perform raw comparison
:param raw:
:type raw:
:return:
:rtype:
"""
return formatters(cleanup, strip)(raw.lower())
def reorder_title(title, articles=('the',), separators=(',', ', ')):
"""
Reorder the title
@@ -57,19 +69,3 @@ def reorder_title(title, articles=('the',), separators=(',', ', ')):
if ltitle[-len(suffix):] == suffix:
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
return title
def chain(*formatters):
"""
Chain formatter functions
:param functions:
:type functions:
:return:
:rtype:
"""
def formatters_chain(input_string): # pylint:disable=missing-docstring
for formatter in formatters:
input_string = formatter(input_string)
return input_string
return formatters_chain
+18 -10
View File
@@ -10,7 +10,8 @@ from .common.comparators import marker_sorted
def prefer_last_path(matches):
"""
If multiple match are found, keep the one in the most valuable filepart.
If multiple match are found with same name, keep the one in the most valuable filepart.
Also keep others match with same value than those in mose valuable filepart.
:param matches:
:param context:
@@ -18,17 +19,24 @@ def prefer_last_path(matches):
"""
filepart = marker_sorted(matches.markers.named('path'), matches)[0]
for name in matches.names:
named_list = matches.named(name)
if len(named_list) > 1:
name_matches = matches.named(name)
if len(name_matches) > 1:
keep_list = []
for named in named_list:
marker = matches.markers.at_match(named, lambda marker: marker is filepart, 0)
keep_values = []
for name_match in name_matches:
marker = matches.markers.at_match(name_match, lambda marker: marker is filepart, 0)
if marker:
keep_list.append(named)
keep_list.append(name_match)
keep_values.append(name_match.value)
for name_match in name_matches:
if name_match not in keep_list and name_match.value in keep_values:
keep_list.append(name_match)
if keep_list:
for named in named_list:
if named not in keep_list:
matches.remove(named)
for name_match in name_matches:
if name_match not in keep_list:
matches.remove(name_match)
def enlarge_group_matches(matches):
@@ -53,4 +61,4 @@ def enlarge_group_matches(matches):
matches.append(match)
PROCESSORS = Rebulk().processor(prefer_last_path, enlarge_group_matches)
PROCESSORS = Rebulk().processor(enlarge_group_matches).post_processor(prefer_last_path)
+3 -2
View File
@@ -4,8 +4,9 @@
Episode title
"""
from rebulk import Rebulk, AppendMatchRule
from rebulk.formatters import formatters
from ..common.formatters import cleanup, reorder_title, chain
from ..common.formatters import cleanup, reorder_title
class EpisodeTitleFromPosition(AppendMatchRule):
@@ -19,7 +20,7 @@ class EpisodeTitleFromPosition(AppendMatchRule):
filename = matches.markers.named('path', -1)
start, end = filename.span
holes = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
predicate=lambda hole: hole.value)
for hole in holes:
+21 -9
View File
@@ -9,13 +9,14 @@ from rebulk import Rebulk, RemoveMatchRule
import regex as re
from ..common.validators import seps_surround
from guessit.rules.common import dash
from ..common.numeral import numeral, parse_numeral
EPISODES = Rebulk().defaults(validate_all=True, validator={'__parent__': seps_surround})
EPISODES.regex_defaults(flags=re.IGNORECASE, children=True)
EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)',
r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)',
EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)', # 01x02
r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)', # S01E02, S01x02
r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)', # S01Ex02
formatter=int,
private_parent=True,
tags=['SxxExx'],
@@ -24,20 +25,31 @@ EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
and other.name == 'screenSize'
else '__default__')
season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
episode_words = ['episode', 'episodes']
EPISODES.regex(r'\L<season_words>-(?P<season>' + numeral + ')', season_words=season_words, # Season 1, # Season one
abbreviations=[dash], formatter=parse_numeral)
season_markers = ['s']
episode_markers = ['e', 'ep']
no_zero_validator = {'__parent__': seps_surround,
'season': lambda match: match.value > 0, 'episodeNumber': lambda match: match.value > 0}
EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int) # 12
EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int) # 02, 012
EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int, # 112, 113
validator=no_zero_validator,
disabled=lambda context: not context.get('episode_prefer_number', False))
EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], # 102
formatter=int,
validator=no_zero_validator,
disabled=lambda context: context.get('episode_prefer_number', False))
EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], # 0102
formatter=int,
validator=no_zero_validator,
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
disabled=lambda context: context.get('episode_prefer_number', False))
+5 -1
View File
@@ -11,6 +11,7 @@ import regex as re
from ..common import dash
from ..common import seps
from ..common.validators import seps_surround
from guessit.rules.common.formatters import raw_cleanup
OTHER = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
OTHER.defaults(name="other", validator=seps_surround)
@@ -88,9 +89,12 @@ def proper_count(matches):
"""
propers = matches.named('other', lambda match: match.value == 'Proper')
if propers:
raws = {} # Count distinct raw values
for proper in propers:
raws[raw_cleanup(proper.raw)] = proper
proper_count_match = copy.copy(propers[-1])
proper_count_match.name = 'properCount'
proper_count_match.value = len(propers)
proper_count_match.value = len(raws)
matches.append(proper_count_match)
+22 -8
View File
@@ -4,8 +4,9 @@
Title
"""
from rebulk import Rebulk, RemoveMatchRule, AppendRemoveMatchRule
from rebulk.formatters import formatters
from ..common.formatters import cleanup, reorder_title, chain
from ..common.formatters import cleanup, reorder_title
from ..common.comparators import marker_sorted
from ..common import seps
from rebulk.rules import AppendRemoveMatchRule
@@ -31,7 +32,7 @@ class TitleFromPosition(AppendRemoveMatchRule):
"""
start, end = filepart.span
first_hole = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
first_hole = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
ignore=TitleFromPosition.ignore_language,
predicate=lambda hole: hole.value, index=0)
@@ -127,20 +128,33 @@ class PreferTitleWithYear(RemoveMatchRule):
priority = -255
def when(self, matches, context):
with_year = []
without_year = []
to_keep = []
to_remove = []
for title in matches.named('title'):
filepart = matches.markers.at_match(title, lambda marker: marker.name == 'path', 0)
if filepart:
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
if year_match:
with_year.append(title)
to_keep.append(title)
else:
without_year.append(title)
to_remove.append(title)
if with_year:
return without_year
if to_keep:
title_values = set([title.value for title in to_keep])
if len(title_values) > 1:
# We have distinct values for title with year. Keep only values from most valuable filepart.
fileparts = marker_sorted(matches.markers.named('path'), matches)
best_title = None
for filepart in fileparts:
best_title = matches.range(filepart.start, filepart.end, lambda match: match.name == 'title', 0)
if best_title:
break
for title in to_keep:
if title.value != best_title.value:
to_remove.append(title)
to_keep.remove(title)
return to_remove
TITLE = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
+10
View File
@@ -25,3 +25,13 @@
? "S03E04 102"
: season: 3
episodeNumber: 4
? +serie Saison 2 other
? +serie Season 2 other
? +serie Saisons 2 other
? +serie Seasons 2 other
? +serie Serie 2 other
? +serie Series 2 other
? +serie Season Two other
? +serie Season II other
: season: 2
+6
View File
@@ -73,3 +73,9 @@
season: 4
episodeNumber: 1
episodeTitle: Fun Run
? Series/Mad Men Season 1 Complete/Mad.Men.S01E01.avi
: title: Mad Men
season: 1
episodeNumber: 1
other: Complete
+6 -4
View File
@@ -2,6 +2,8 @@
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
import logging
logger = logging.getLogger(__name__)
from collections import OrderedDict
import babelfish
@@ -192,13 +194,13 @@ class TestYml(object):
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
entry = self.check(string, expected)
if entry.ok:
logging.debug(u'[' + filename + '] ' + six.text_type(entry))
logger.debug(u'[' + filename + '] ' + six.text_type(entry))
elif entry.warning:
logging.warning(u'[' + filename + '] ' + six.text_type(entry))
logger.warning(u'[' + filename + '] ' + six.text_type(entry))
elif entry.error:
logging.error(u'[' + filename + '] ' + six.text_type(entry))
logger.error(u'[' + filename + '] ' + six.text_type(entry))
for line in entry.details:
logging.error(u'[' + filename + '] ' + ' ' * 4 + line)
logger.error(u'[' + filename + '] ' + ' ' * 4 + line)
entries.append(entry)
entries.assert_ok()
+1 -1
View File
@@ -1,2 +1,2 @@
[pytest]
addopts = --ignore=setup.py --doctest-modules --doctest-glob='README.rst'
addopts =-s --ignore=setup.py --doctest-modules --doctest-glob='README.rst'