mirror of
https://github.com/zoriya/guessit.git
synced 2026-06-04 19:05:44 +00:00
Add more season/episodeNumber support
This commit is contained in:
@@ -5,6 +5,10 @@ Entry point module
|
||||
"""
|
||||
# pragma: no cover
|
||||
from __future__ import print_function
|
||||
|
||||
from rebulk import debug
|
||||
debug.DEBUG = True
|
||||
|
||||
from collections import OrderedDict
|
||||
import os
|
||||
import logging
|
||||
|
||||
@@ -6,6 +6,7 @@ Formatters
|
||||
|
||||
from . import seps
|
||||
import regex as re
|
||||
from rebulk.formatters import formatters
|
||||
|
||||
_excluded_clean_chars = ',:;-/\\'
|
||||
clean_chars = ""
|
||||
@@ -38,6 +39,17 @@ def strip(input_string):
|
||||
return input_string.strip(seps)
|
||||
|
||||
|
||||
def raw_cleanup(raw):
|
||||
"""
|
||||
Cleanup a raw value to perform raw comparison
|
||||
:param raw:
|
||||
:type raw:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return formatters(cleanup, strip)(raw.lower())
|
||||
|
||||
|
||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
"""
|
||||
Reorder the title
|
||||
@@ -57,19 +69,3 @@ def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
if ltitle[-len(suffix):] == suffix:
|
||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||
return title
|
||||
|
||||
|
||||
def chain(*formatters):
|
||||
"""
|
||||
Chain formatter functions
|
||||
:param functions:
|
||||
:type functions:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
||||
for formatter in formatters:
|
||||
input_string = formatter(input_string)
|
||||
return input_string
|
||||
|
||||
return formatters_chain
|
||||
|
||||
+18
-10
@@ -10,7 +10,8 @@ from .common.comparators import marker_sorted
|
||||
|
||||
def prefer_last_path(matches):
|
||||
"""
|
||||
If multiple match are found, keep the one in the most valuable filepart.
|
||||
If multiple match are found with same name, keep the one in the most valuable filepart.
|
||||
Also keep others match with same value than those in mose valuable filepart.
|
||||
|
||||
:param matches:
|
||||
:param context:
|
||||
@@ -18,17 +19,24 @@ def prefer_last_path(matches):
|
||||
"""
|
||||
filepart = marker_sorted(matches.markers.named('path'), matches)[0]
|
||||
for name in matches.names:
|
||||
named_list = matches.named(name)
|
||||
if len(named_list) > 1:
|
||||
name_matches = matches.named(name)
|
||||
if len(name_matches) > 1:
|
||||
keep_list = []
|
||||
for named in named_list:
|
||||
marker = matches.markers.at_match(named, lambda marker: marker is filepart, 0)
|
||||
keep_values = []
|
||||
for name_match in name_matches:
|
||||
marker = matches.markers.at_match(name_match, lambda marker: marker is filepart, 0)
|
||||
if marker:
|
||||
keep_list.append(named)
|
||||
keep_list.append(name_match)
|
||||
keep_values.append(name_match.value)
|
||||
|
||||
for name_match in name_matches:
|
||||
if name_match not in keep_list and name_match.value in keep_values:
|
||||
keep_list.append(name_match)
|
||||
|
||||
if keep_list:
|
||||
for named in named_list:
|
||||
if named not in keep_list:
|
||||
matches.remove(named)
|
||||
for name_match in name_matches:
|
||||
if name_match not in keep_list:
|
||||
matches.remove(name_match)
|
||||
|
||||
|
||||
def enlarge_group_matches(matches):
|
||||
@@ -53,4 +61,4 @@ def enlarge_group_matches(matches):
|
||||
matches.append(match)
|
||||
|
||||
|
||||
PROCESSORS = Rebulk().processor(prefer_last_path, enlarge_group_matches)
|
||||
PROCESSORS = Rebulk().processor(enlarge_group_matches).post_processor(prefer_last_path)
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
Episode title
|
||||
"""
|
||||
from rebulk import Rebulk, AppendMatchRule
|
||||
from rebulk.formatters import formatters
|
||||
|
||||
from ..common.formatters import cleanup, reorder_title, chain
|
||||
from ..common.formatters import cleanup, reorder_title
|
||||
|
||||
|
||||
class EpisodeTitleFromPosition(AppendMatchRule):
|
||||
@@ -19,7 +20,7 @@ class EpisodeTitleFromPosition(AppendMatchRule):
|
||||
filename = matches.markers.named('path', -1)
|
||||
start, end = filename.span
|
||||
|
||||
holes = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
|
||||
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
||||
predicate=lambda hole: hole.value)
|
||||
|
||||
for hole in holes:
|
||||
|
||||
@@ -9,13 +9,14 @@ from rebulk import Rebulk, RemoveMatchRule
|
||||
import regex as re
|
||||
from ..common.validators import seps_surround
|
||||
from guessit.rules.common import dash
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
|
||||
EPISODES = Rebulk().defaults(validate_all=True, validator={'__parent__': seps_surround})
|
||||
EPISODES.regex_defaults(flags=re.IGNORECASE, children=True)
|
||||
|
||||
EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
|
||||
r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)',
|
||||
r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)',
|
||||
EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)', # 01x02
|
||||
r'S(?P<season>\d+)[ex](?P<episodeNumber>\d+)', # S01E02, S01x02
|
||||
r'S(?P<season>\d+)xe(?P<episodeNumber>\d+)', # S01Ex02
|
||||
formatter=int,
|
||||
private_parent=True,
|
||||
tags=['SxxExx'],
|
||||
@@ -24,20 +25,31 @@ EPISODES.regex(r'(?P<season>\d+)x(?P<episodeNumber>\d+)',
|
||||
and other.name == 'screenSize'
|
||||
else '__default__')
|
||||
|
||||
season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series']
|
||||
episode_words = ['episode', 'episodes']
|
||||
|
||||
EPISODES.regex(r'\L<season_words>-(?P<season>' + numeral + ')', season_words=season_words, # Season 1, # Season one
|
||||
abbreviations=[dash], formatter=parse_numeral)
|
||||
|
||||
season_markers = ['s']
|
||||
episode_markers = ['e', 'ep']
|
||||
|
||||
|
||||
no_zero_validator = {'__parent__': seps_surround,
|
||||
'season': lambda match: match.value > 0, 'episodeNumber': lambda match: match.value > 0}
|
||||
|
||||
|
||||
EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
|
||||
EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int)
|
||||
EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
|
||||
EPISODES.regex(r'(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int) # 12
|
||||
EPISODES.regex(r'0(?P<episodeNumber>\d{1,2})', tags=['bonus-conflict', 'weak-movie'], formatter=int) # 02, 012
|
||||
EPISODES.regex(r'(?P<episodeNumber>\d{3,4})', tags=['bonus-conflict', 'weak-movie'], formatter=int, # 112, 113
|
||||
validator=no_zero_validator,
|
||||
disabled=lambda context: not context.get('episode_prefer_number', False))
|
||||
|
||||
EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
|
||||
EPISODES.regex(r'(?P<season>\d{1})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], # 102
|
||||
formatter=int,
|
||||
validator=no_zero_validator,
|
||||
disabled=lambda context: context.get('episode_prefer_number', False))
|
||||
EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], formatter=int,
|
||||
EPISODES.regex(r'(?P<season>\d{2})(?P<episodeNumber>\d{2})', tags=['bonus-conflict', 'weak-movie'], # 0102
|
||||
formatter=int,
|
||||
validator=no_zero_validator,
|
||||
conflict_solver=lambda match, other: match if other.name == 'year' else '__default__',
|
||||
disabled=lambda context: context.get('episode_prefer_number', False))
|
||||
|
||||
@@ -11,6 +11,7 @@ import regex as re
|
||||
from ..common import dash
|
||||
from ..common import seps
|
||||
from ..common.validators import seps_surround
|
||||
from guessit.rules.common.formatters import raw_cleanup
|
||||
|
||||
OTHER = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
OTHER.defaults(name="other", validator=seps_surround)
|
||||
@@ -88,9 +89,12 @@ def proper_count(matches):
|
||||
"""
|
||||
propers = matches.named('other', lambda match: match.value == 'Proper')
|
||||
if propers:
|
||||
raws = {} # Count distinct raw values
|
||||
for proper in propers:
|
||||
raws[raw_cleanup(proper.raw)] = proper
|
||||
proper_count_match = copy.copy(propers[-1])
|
||||
proper_count_match.name = 'properCount'
|
||||
proper_count_match.value = len(propers)
|
||||
proper_count_match.value = len(raws)
|
||||
matches.append(proper_count_match)
|
||||
|
||||
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
Title
|
||||
"""
|
||||
from rebulk import Rebulk, RemoveMatchRule, AppendRemoveMatchRule
|
||||
from rebulk.formatters import formatters
|
||||
|
||||
from ..common.formatters import cleanup, reorder_title, chain
|
||||
from ..common.formatters import cleanup, reorder_title
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common import seps
|
||||
from rebulk.rules import AppendRemoveMatchRule
|
||||
@@ -31,7 +32,7 @@ class TitleFromPosition(AppendRemoveMatchRule):
|
||||
"""
|
||||
start, end = filepart.span
|
||||
|
||||
first_hole = matches.holes(start, end + 1, formatter=chain(cleanup, reorder_title),
|
||||
first_hole = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
||||
ignore=TitleFromPosition.ignore_language,
|
||||
predicate=lambda hole: hole.value, index=0)
|
||||
|
||||
@@ -127,20 +128,33 @@ class PreferTitleWithYear(RemoveMatchRule):
|
||||
priority = -255
|
||||
|
||||
def when(self, matches, context):
|
||||
with_year = []
|
||||
without_year = []
|
||||
to_keep = []
|
||||
to_remove = []
|
||||
|
||||
for title in matches.named('title'):
|
||||
filepart = matches.markers.at_match(title, lambda marker: marker.name == 'path', 0)
|
||||
if filepart:
|
||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||
if year_match:
|
||||
with_year.append(title)
|
||||
to_keep.append(title)
|
||||
else:
|
||||
without_year.append(title)
|
||||
to_remove.append(title)
|
||||
|
||||
if with_year:
|
||||
return without_year
|
||||
if to_keep:
|
||||
title_values = set([title.value for title in to_keep])
|
||||
if len(title_values) > 1:
|
||||
# We have distinct values for title with year. Keep only values from most valuable filepart.
|
||||
fileparts = marker_sorted(matches.markers.named('path'), matches)
|
||||
best_title = None
|
||||
for filepart in fileparts:
|
||||
best_title = matches.range(filepart.start, filepart.end, lambda match: match.name == 'title', 0)
|
||||
if best_title:
|
||||
break
|
||||
for title in to_keep:
|
||||
if title.value != best_title.value:
|
||||
to_remove.append(title)
|
||||
to_keep.remove(title)
|
||||
return to_remove
|
||||
|
||||
|
||||
TITLE = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)
|
||||
|
||||
@@ -25,3 +25,13 @@
|
||||
? "S03E04 102"
|
||||
: season: 3
|
||||
episodeNumber: 4
|
||||
|
||||
? +serie Saison 2 other
|
||||
? +serie Season 2 other
|
||||
? +serie Saisons 2 other
|
||||
? +serie Seasons 2 other
|
||||
? +serie Serie 2 other
|
||||
? +serie Series 2 other
|
||||
? +serie Season Two other
|
||||
? +serie Season II other
|
||||
: season: 2
|
||||
|
||||
@@ -73,3 +73,9 @@
|
||||
season: 4
|
||||
episodeNumber: 1
|
||||
episodeTitle: Fun Run
|
||||
|
||||
? Series/Mad Men Season 1 Complete/Mad.Men.S01E01.avi
|
||||
: title: Mad Men
|
||||
season: 1
|
||||
episodeNumber: 1
|
||||
other: Complete
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import babelfish
|
||||
@@ -192,13 +194,13 @@ class TestYml(object):
|
||||
if not string_predicate or string_predicate(string): # pylint: disable=not-callable
|
||||
entry = self.check(string, expected)
|
||||
if entry.ok:
|
||||
logging.debug(u'[' + filename + '] ' + six.text_type(entry))
|
||||
logger.debug(u'[' + filename + '] ' + six.text_type(entry))
|
||||
elif entry.warning:
|
||||
logging.warning(u'[' + filename + '] ' + six.text_type(entry))
|
||||
logger.warning(u'[' + filename + '] ' + six.text_type(entry))
|
||||
elif entry.error:
|
||||
logging.error(u'[' + filename + '] ' + six.text_type(entry))
|
||||
logger.error(u'[' + filename + '] ' + six.text_type(entry))
|
||||
for line in entry.details:
|
||||
logging.error(u'[' + filename + '] ' + ' ' * 4 + line)
|
||||
logger.error(u'[' + filename + '] ' + ' ' * 4 + line)
|
||||
entries.append(entry)
|
||||
entries.assert_ok()
|
||||
|
||||
|
||||
+1
-1
@@ -1,2 +1,2 @@
|
||||
[pytest]
|
||||
addopts = --ignore=setup.py --doctest-modules --doctest-glob='README.rst'
|
||||
addopts =-s --ignore=setup.py --doctest-modules --doctest-glob='README.rst'
|
||||
|
||||
Reference in New Issue
Block a user