diff --git a/HISTORY.rst b/HISTORY.rst index 0c41b9c..2ce09c6 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,7 +4,7 @@ History 2.0rc8 (unreleased) ------------------- -- Nothing changed yet. +- Remove regex native module from required dependencies. It will now be used only if present. 2.0rc7 (2016-01-18) diff --git a/guessit/reutils.py b/guessit/reutils.py new file mode 100644 index 0000000..8048612 --- /dev/null +++ b/guessit/reutils.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Utils for re module +""" + +from rebulk.remodule import re + + +def build_or_pattern(patterns, escape=False): + """Build a or pattern string from a list of possible patterns + """ + or_pattern = [] + for pattern in patterns: + if not or_pattern: + or_pattern.append('(?:') + else: + or_pattern.append('|') + or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern) + or_pattern.append(')') + return ''.join(or_pattern) diff --git a/guessit/rules/common/date.py b/guessit/rules/common/date.py index 56b0987..6592274 100644 --- a/guessit/rules/common/date.py +++ b/guessit/rules/common/date.py @@ -5,10 +5,10 @@ Date """ from __future__ import unicode_literals -import regex as re - from dateutil import parser +from rebulk.remodule import re + _dsep = r'[-/ \.]' _dsep_bis = r'[-/ \.x]' diff --git a/guessit/rules/common/formatters.py b/guessit/rules/common/formatters.py index ca70811..91fb9bf 100644 --- a/guessit/rules/common/formatters.py +++ b/guessit/rules/common/formatters.py @@ -5,7 +5,7 @@ Formatters """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk.formatters import formatters diff --git a/guessit/rules/common/numeral.py b/guessit/rules/common/numeral.py index 8b868ea..0ab198e 100644 --- a/guessit/rules/common/numeral.py +++ b/guessit/rules/common/numeral.py @@ -5,7 +5,7 @@ parse numeral from various formats """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re digital_numeral = r'\d{1,4}' diff --git a/guessit/rules/common/words.py b/guessit/rules/common/words.py index e177b53..e0271aa 100644 --- a/guessit/rules/common/words.py +++ b/guessit/rules/common/words.py @@ -5,7 +5,7 @@ Words utils """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re _words_rexp = re.compile(r'\w+', re.UNICODE) diff --git a/guessit/rules/properties/audio_codec.py b/guessit/rules/properties/audio_codec.py index 309b216..36c29ef 100644 --- a/guessit/rules/properties/audio_codec.py +++ b/guessit/rules/properties/audio_codec.py @@ -5,7 +5,7 @@ audio_codec, audio_profile and audio_channels property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, Rule, RemoveMatch from ..common import dash diff --git a/guessit/rules/properties/bonus.py b/guessit/rules/properties/bonus.py index 416de2b..e8721be 100644 --- a/guessit/rules/properties/bonus.py +++ b/guessit/rules/properties/bonus.py @@ -5,7 +5,7 @@ bonus property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, AppendMatch, Rule diff --git a/guessit/rules/properties/cds.py b/guessit/rules/properties/cds.py index e2d39b3..b6ab9d0 100644 --- a/guessit/rules/properties/cds.py +++ b/guessit/rules/properties/cds.py @@ -5,7 +5,7 @@ cd and cd_count properties """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk from ..common import dash diff --git a/guessit/rules/properties/container.py b/guessit/rules/properties/container.py index 6bf425d..ff39833 100644 --- a/guessit/rules/properties/container.py +++ b/guessit/rules/properties/container.py @@ -5,10 +5,11 @@ container property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re, REGEX_AVAILABLE from rebulk import Rebulk from ..common.validators import seps_surround +from ...reutils import build_or_pattern def container(): @@ -34,10 +35,16 @@ def container(): 'iso', 'vob'] torrent = ['torrent'] - rebulk.regex(r'\.\L$', exts=subtitles, tags=['extension', 'subtitle']) - rebulk.regex(r'\.\L$', exts=info, tags=['extension', 'info']) - rebulk.regex(r'\.\L$', exts=videos, tags=['extension', 'video']) - rebulk.regex(r'\.\L$', exts=torrent, tags=['extension', 'torrent']) + if REGEX_AVAILABLE: + rebulk.regex(r'\.\L$', exts=subtitles, tags=['extension', 'subtitle']) + rebulk.regex(r'\.\L$', exts=info, tags=['extension', 'info']) + rebulk.regex(r'\.\L$', exts=videos, tags=['extension', 'video']) + rebulk.regex(r'\.\L$', exts=torrent, tags=['extension', 'torrent']) + else: + rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle']) + rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info']) + rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video']) + rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent']) rebulk.defaults(name='container', validator=seps_surround, diff --git a/guessit/rules/properties/crc.py b/guessit/rules/properties/crc.py index 62275ca..493f2f4 100644 --- a/guessit/rules/properties/crc.py +++ b/guessit/rules/properties/crc.py @@ -5,7 +5,7 @@ crc and uuid properties """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk from ..common.validators import seps_surround diff --git a/guessit/rules/properties/edition.py b/guessit/rules/properties/edition.py index 93021a5..1bc37b5 100644 --- a/guessit/rules/properties/edition.py +++ b/guessit/rules/properties/edition.py @@ -5,7 +5,7 @@ edition property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk from ..common import dash diff --git a/guessit/rules/properties/episodes.py b/guessit/rules/properties/episodes.py index 87995f0..a6148f4 100644 --- a/guessit/rules/properties/episodes.py +++ b/guessit/rules/properties/episodes.py @@ -5,16 +5,17 @@ episode, season, episode_count, season_count and episode_details properties """ from __future__ import unicode_literals -from collections import defaultdict import copy - -import regex as re +from collections import defaultdict from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch +from rebulk.remodule import re, REGEX_AVAILABLE +from ...reutils import build_or_pattern + from .title import TitleFromPosition -from ..common.validators import seps_surround from ..common import dash, alt_dash from ..common.numeral import numeral, parse_numeral +from ..common.validators import seps_surround def episodes(): @@ -23,28 +24,51 @@ def episodes(): :return: Created Rebulk object :rtype: Rebulk """ + #pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # 01x02, 01x02x03x04 - rebulk.regex(r'(?P\d+)@?x@?(?P\d+)' + - r'(?:(?Px|-|\+|&)(?P\d+))*', - # S01E02, S01x02, S01E02E03, S01Ex02, S01xE02, SO1Ex02Ex03 - r'S(?P\d+)@?(?:xE|Ex|E|x)@?(?P\d+)' + - r'(?:(?PxE|Ex|E|x|-|\+|&)(?P\d+))*', - # S01 - r'S(?P\d+)' + - r'(?:(?PS|-|\+|&)(?P\d+))*', - formatter={'season': int, 'episode': int}, - tags=['SxxExx'], - abbreviations=[alt_dash], - children=True, - private_parent=True, - conflict_solver=lambda match, other: match - if match.name in ['season', 'episode'] and other.name in - ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] - else '__default__') + if REGEX_AVAILABLE: + rebulk.regex(r'(?P\d+)@?x@?(?P\d+)' + + r'(?:(?Px|-|\+|&)(?P\d+))*', + # S01E02, S01x02, S01E02E03, S01Ex02, S01xE02, SO1Ex02Ex03 + r'S(?P\d+)@?(?:xE|Ex|E|x)@?(?P\d+)' + + r'(?:(?PxE|Ex|E|x|-|\+|&)(?P\d+))*', + # S01 + r'S(?P\d+)' + + r'(?:(?PS|-|\+|&)(?P\d+))*', + formatter={'season': int, 'episode': int}, + tags=['SxxExx'], + abbreviations=[alt_dash], + children=True, + private_parent=True, + conflict_solver=lambda match, other: match + if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', + 'audio_codec', 'audio_channels', + 'container', 'date'] + else '__default__') + else: + rebulk.chain(formatter={'season': int, 'episode': int}, + tags=['SxxExx'], + abbreviations=[alt_dash], + children=True, + private_parent=True, + conflict_solver=lambda match, other: match + if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', + 'audio_codec', 'audio_channels', + 'container', 'date'] + else '__default__') \ + .defaults(validator=None)\ + .regex(r'S(?P\d+)@?(?:xE|Ex|E|x)@?(?P\d+)') \ + .regex(r'(?:(?PxE|Ex|E|x|-|\+|&)(?P\d+))').repeater('*') \ + .chain() \ + .regex(r'(?P\d+)@?x@?(?P\d+)') \ + .regex(r'(?:(?Px|-|\+|&)(?P\d+))').repeater('*') \ + .chain() \ + .regex(r'S(?P\d+)') \ + .regex(r'(?:(?PS|-|\+|&)(?P\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): @@ -59,95 +83,185 @@ def episodes(): of_words = ['of', 'sur'] all_words = ['All'] - rebulk.regex(r'\L@?(?P' + numeral + ')' + - r'(?:@?\L@?(?P' + numeral + '))?' + - r'(?:@?(?P-)@?(?P\d+))*' + - r'(?:@?(?P\+|&)@?(?P\d+))*', - of_words=of_words, - season_words=season_words, # Season 1, # Season one - abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) + if REGEX_AVAILABLE: + rebulk.regex(r'\L@?(?P' + numeral + ')' + + r'(?:@?\L@?(?P' + numeral + '))?' + + r'(?:@?(?P-)@?(?P\d+))*' + + r'(?:@?(?P\+|&)@?(?P\d+))*', + of_words=of_words, + season_words=season_words, # Season 1, # Season one + abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) + else: + rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral})\ + .defaults(validator=None)\ + .regex(build_or_pattern(season_words) + '@?(?P' + numeral + ')') \ + .regex(r'' + build_or_pattern(of_words) + '@?(?P' + numeral + ')').repeater('?') \ + .regex(r'@?(?P-)@?(?P\d+)').repeater('*') \ + .regex(r'@?(?P\+|&)@?(?P\d+)').repeater('*') - rebulk.regex(r'\L-?(?P\d+)' + - r'(?:v(?P\d+))?' + - r'(?:-?\L?-?(?P\d+))?', - of_words=of_words, - episode_words=episode_words, # Episode 4 - abbreviations=[dash], formatter=int, - disabled=lambda context: context.get('type') == 'episode') + if REGEX_AVAILABLE: + rebulk.regex(r'\L-?(?P\d+)' + + r'(?:v(?P\d+))?' + + r'(?:-?\L?-?(?P\d+))?', + of_words=of_words, + episode_words=episode_words, # Episode 4 + abbreviations=[dash], formatter=int, + disabled=lambda context: context.get('type') == 'episode') + else: + rebulk.regex(build_or_pattern(episode_words) + r'-?(?P\d+)' + + r'(?:v(?P\d+))?' + + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P\d+))?', # Episode 4 + abbreviations=[dash], formatter=int, + disabled=lambda context: context.get('type') == 'episode') - rebulk.regex(r'\L-?(?P' + numeral + ')' + - r'(?:v(?P\d+))?' + - r'(?:-?\L?-?(?P\d+))?', - of_words=of_words, - episode_words=episode_words, # Episode 4 - abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, - disabled=lambda context: context.get('type') != 'episode') + if REGEX_AVAILABLE: + rebulk.regex(r'\L-?(?P' + numeral + ')' + + r'(?:v(?P\d+))?' + + r'(?:-?\L?-?(?P\d+))?', + of_words=of_words, + episode_words=episode_words, # Episode 4 + abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, + disabled=lambda context: context.get('type') != 'episode') + else: + rebulk.regex(build_or_pattern(episode_words) + r'-?(?P' + numeral + ')' + + r'(?:v(?P\d+))?' + + r'(?:-?'+ build_or_pattern(of_words) + r'?-?(?P\d+))?', # Episode 4 + abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, + disabled=lambda context: context.get('type') != 'episode') - rebulk.regex(r'S?(?P\d+)-?(?:xE|Ex|E|x)-?(?P\L)', - tags=['SxxExx'], - all_words=all_words, - abbreviations=[dash], - validator=None, - formatter={'season': int, 'other': lambda match: 'Complete'}) + if REGEX_AVAILABLE: + rebulk.regex(r'S?(?P\d+)-?(?:xE|Ex|E|x)-?(?P\L)', + tags=['SxxExx'], + all_words=all_words, + abbreviations=[dash], + validator=None, + formatter={'season': int, 'other': lambda match: 'Complete'}) + else: + rebulk.regex(r'S?(?P\d+)-?(?:xE|Ex|E|x)-?(?P'+build_or_pattern(all_words)+')', + tags=['SxxExx'], + abbreviations=[dash], + validator=None, + formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) - # 12, 13 - rebulk.regex(r'(?P\d{2})' + - r'(?:v(?P\d+))?' + - r'(?:(?P[x-])(?P\d{2}))*', - tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) + if REGEX_AVAILABLE: + # 12, 13 + rebulk.regex(r'(?P\d{2})' + + r'(?:v(?P\d+))?' + + r'(?:(?P[x-])(?P\d{2}))*', + tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) + else: + rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ + .defaults(validator=None) \ + .regex(r'(?P\d{2})')\ + .regex(r'v(?P\d+)').repeater('?')\ + .regex(r'(?P[x-])(?P\d{2})').repeater('*') - # 012, 013 - rebulk.regex(r'0(?P\d{1,2})' + - r'(?:v(?P\d+))?' + - r'(?:(?P[x-])0(?P\d{1,2}))*', - tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) + if REGEX_AVAILABLE: + # 012, 013 + rebulk.regex(r'0(?P\d{1,2})' + + r'(?:v(?P\d+))?' + + r'(?:(?P[x-])0(?P\d{1,2}))*', + tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) + else: + rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ + .defaults(validator=None) \ + .regex(r'0(?P\d{1,2})') \ + .regex(r'v(?P\d+)').repeater('?') \ + .regex(r'(?P[x-])0(?P\d{1,2})').repeater('*') - # 112, 113 - rebulk.regex(r'(?P\d{3,4})' + - r'(?:v(?P\d+))?' + - r'(?:(?P[x-])(?P\d{3,4}))*', - tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, - disabled=lambda context: not context.get('episode_prefer_number', False)) + if REGEX_AVAILABLE: + # 112, 113 + rebulk.regex(r'(?P\d{3,4})' + + r'(?:v(?P\d+))?' + + r'(?:(?P[x-])(?P\d{3,4}))*', + tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, + disabled=lambda context: not context.get('episode_prefer_number', False)) + else: + rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, + disabled=lambda context: not context.get('episode_prefer_number', False)) \ + .defaults(validator=None) \ + .regex(r'(?P\d{3,4})')\ + .regex(r'v(?P\d+)').repeater('?')\ + .regex(r'(?P[x-])(?P\d{3,4})').repeater('*') - # 1, 2, 3 - rebulk.regex(r'(?P\d)' + - r'(?:v(?P\d+))?' + - r'(?:(?P[x-])(?P\d{1,2}))*', - tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, - disabled=lambda context: context.get('type') != 'episode') + if REGEX_AVAILABLE: + # 1, 2, 3 + rebulk.regex(r'(?P\d)' + + r'(?:v(?P\d+))?' + + r'(?:(?P[x-])(?P\d{1,2}))*', + tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, + disabled=lambda context: context.get('type') != 'episode') + else: + rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, + disabled=lambda context: context.get('type') != 'episode') \ + .defaults(validator=None) \ + .regex(r'(?P\d)')\ + .regex(r'v(?P\d+)').repeater('?')\ + .regex(r'(?P[x-])(?P\d{1,2})').repeater('*') # e112, e113 - rebulk.regex(r'e(?P\d{1,4})' + - r'(?:v(?P\d+))?' + - r'(?:(?Pe|x|-)(?P\d{1,4}))*', - formatter={'episode': int, 'version': int}) + if REGEX_AVAILABLE: + rebulk.regex(r'e(?P\d{1,4})' + + r'(?:v(?P\d+))?' + + r'(?:(?Pe|x|-)(?P\d{1,4}))*', + formatter={'episode': int, 'version': int}) + else: + rebulk.chain(formatter={'episode': int, 'version': int}) \ + .defaults(validator=None) \ + .regex(r'e(?P\d{1,4})')\ + .regex(r'v(?P\d+)').repeater('?')\ + .regex(r'(?Pe|x|-)(?P\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 - rebulk.regex(r'ep-?(?P\d{1,4})' + - r'(?:v(?P\d+))?' + - r'(?:(?Pep|e|x|-)(?P\d{1,4}))*', - abbreviations=[dash], - formatter={'episode': int, 'version': int}) + if REGEX_AVAILABLE: + rebulk.regex(r'ep-?(?P\d{1,4})' + + r'(?:v(?P\d+))?' + + r'(?:(?Pep|e|x|-)(?P\d{1,4}))*', + abbreviations=[dash], + formatter={'episode': int, 'version': int}) + else: + rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ + .defaults(validator=None) \ + .regex(r'ep-?(?P\d{1,4})')\ + .regex(r'v(?P\d+)').repeater('?')\ + .regex(r'(?Pep|e|x|-)(?P\d{1,4})').repeater('*') # 102, 0102 - rebulk.regex(r'(?P\d{1,2})(?P\d{2})' + - r'(?:v(?P\d+))?' + - r'(?:(?Px|-)(?P\d{2}))*', - tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], - formatter={'season': int, 'episode': int, 'version': int}, - conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', - disabled=lambda context: context.get('episode_prefer_number', False)) + if REGEX_AVAILABLE: + rebulk.regex(r'(?P\d{1,2})(?P\d{2})' + + r'(?:v(?P\d+))?' + + r'(?:(?Px|-)(?P\d{2}))*', + tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], + formatter={'season': int, 'episode': int, 'version': int}, + conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', + disabled=lambda context: context.get('episode_prefer_number', False)) + else: + rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], + formatter={'season': int, 'episode': int, 'version': int}, + conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', + disabled=lambda context: context.get('episode_prefer_number', False))\ + .defaults(validator=None)\ + .regex(r'(?P\d{1,2})(?P\d{2})')\ + .regex(r'v(?P\d+)').repeater('?')\ + .regex(r'(?Px|-)(?P\d{2})').repeater('*') rebulk.regex(r'v(?P\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) + #TODO: List of words # detached of X count (season/episode) - rebulk.regex(r'(?P\d+)?-?\L-?(?P\d+)-?\L?', of_words=of_words, - episode_words=episode_words, abbreviations=[dash], children=True, private_parent=True, formatter=int) + if REGEX_AVAILABLE: + rebulk.regex(r'(?P\d+)?-?\L-?(?P\d+)-?\L?', of_words=of_words, + episode_words=episode_words, abbreviations=[dash], children=True, private_parent=True, + formatter=int) + else: + rebulk.regex(r'(?P\d+)?-?' + build_or_pattern(of_words) + + r'-?(?P\d+)-?' + build_or_pattern(episode_words) + '?', + abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") diff --git a/guessit/rules/properties/film.py b/guessit/rules/properties/film.py index 14a8873..e0d8eae 100644 --- a/guessit/rules/properties/film.py +++ b/guessit/rules/properties/film.py @@ -5,7 +5,7 @@ film property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, AppendMatch, Rule from ..common.formatters import cleanup diff --git a/guessit/rules/properties/format.py b/guessit/rules/properties/format.py index d250a8b..8b7430d 100644 --- a/guessit/rules/properties/format.py +++ b/guessit/rules/properties/format.py @@ -5,7 +5,7 @@ format property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, RemoveMatch, Rule from ..common import dash diff --git a/guessit/rules/properties/language.py b/guessit/rules/properties/language.py index c42b2e1..d1e882c 100644 --- a/guessit/rules/properties/language.py +++ b/guessit/rules/properties/language.py @@ -8,9 +8,9 @@ from __future__ import unicode_literals import copy -import regex as re import babelfish +from rebulk.remodule import re from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch from ..common.words import iter_words, COMMON_WORDS from ..common.validators import seps_surround diff --git a/guessit/rules/properties/other.py b/guessit/rules/properties/other.py index e5d7b49..1e8808c 100644 --- a/guessit/rules/properties/other.py +++ b/guessit/rules/properties/other.py @@ -7,7 +7,7 @@ from __future__ import unicode_literals import copy -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, Rule, RemoveMatch, POST_PROCESS, AppendMatch from ..common import dash diff --git a/guessit/rules/properties/part.py b/guessit/rules/properties/part.py index 483b86e..f71132c 100644 --- a/guessit/rules/properties/part.py +++ b/guessit/rules/properties/part.py @@ -5,12 +5,13 @@ part property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re, REGEX_AVAILABLE from rebulk import Rebulk from ..common import dash from ..common.validators import seps_surround from ..common.numeral import numeral, parse_numeral +from ...reutils import build_or_pattern def part(): @@ -23,7 +24,11 @@ def part(): prefixes = ['pt', 'part'] - rebulk.regex(r'\L-?(' + numeral + r')', prefixes=prefixes, - name='part', validate_all=True, private_parent=True, children=True, formatter=parse_numeral) + if REGEX_AVAILABLE: + rebulk.regex(r'\L-?(' + numeral + r')', prefixes=prefixes, + name='part', validate_all=True, private_parent=True, children=True, formatter=parse_numeral) + else: + rebulk.regex(build_or_pattern(prefixes) + r'-?(' + numeral + r')', prefixes=prefixes, + name='part', validate_all=True, private_parent=True, children=True, formatter=parse_numeral) return rebulk diff --git a/guessit/rules/properties/release_group.py b/guessit/rules/properties/release_group.py index 0802f49..b3e0bf1 100644 --- a/guessit/rules/properties/release_group.py +++ b/guessit/rules/properties/release_group.py @@ -7,7 +7,7 @@ from __future__ import unicode_literals import copy -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, Rule, AppendMatch from ..common.validators import int_coercable diff --git a/guessit/rules/properties/screen_size.py b/guessit/rules/properties/screen_size.py index d5dbf2e..3f34966 100644 --- a/guessit/rules/properties/screen_size.py +++ b/guessit/rules/properties/screen_size.py @@ -5,7 +5,7 @@ screen_size property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, Rule, RemoveMatch from ..common.validators import seps_surround diff --git a/guessit/rules/properties/video_codec.py b/guessit/rules/properties/video_codec.py index f3591b5..51139ae 100644 --- a/guessit/rules/properties/video_codec.py +++ b/guessit/rules/properties/video_codec.py @@ -5,7 +5,7 @@ video_codec and video_profile property """ from __future__ import unicode_literals -import regex as re +from rebulk.remodule import re from rebulk import Rebulk, Rule, RemoveMatch from ..common import dash diff --git a/guessit/rules/properties/website.py b/guessit/rules/properties/website.py index 8040ad7..30c1fe0 100644 --- a/guessit/rules/properties/website.py +++ b/guessit/rules/properties/website.py @@ -6,9 +6,10 @@ Website property. from __future__ import unicode_literals from pkg_resources import resource_stream # @UnresolvedImport -import regex as re +from rebulk.remodule import re, REGEX_AVAILABLE from rebulk import Rebulk +from ...reutils import build_or_pattern def website(): @@ -28,12 +29,27 @@ def website(): safe_subdomains = ['www'] # For sure a website subdomain safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure - rebulk.regex(r'(?:[^a-z0-9]|^)((?:\L\.)+(?:[a-z-]+\.)+(?:\L))(?:[^a-z0-9]|$)', - safe_subdomains=safe_subdomains, tlds=tlds, children=True) - rebulk.regex(r'(?:[^a-z0-9]|^)((?:\L\.)*[a-z-]+\.(?:\L))(?:[^a-z0-9]|$)', - safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) - rebulk.regex( - r'(?:[^a-z0-9]|^)((?:\L\.)*[a-z-]+\.(?:\L\.)+(?:\L))(?:[^a-z0-9]|$)', - safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) + if REGEX_AVAILABLE: + rebulk.regex(r'(?:[^a-z0-9]|^)((?:\L\.)+(?:[a-z-]+\.)+(?:\L))(?:[^a-z0-9]|$)', + safe_subdomains=safe_subdomains, tlds=tlds, children=True) + rebulk.regex(r'(?:[^a-z0-9]|^)((?:\L\.)*[a-z-]+\.(?:\L))(?:[^a-z0-9]|$)', + safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) + rebulk.regex( + r'(?:[^a-z0-9]|^)((?:\L\.)*[a-z-]+\.(?:\L\.)+(?:\L))(?:[^a-z0-9]|$)', + safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) + else: + rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + + r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) + + r'))(?:[^a-z0-9]|$)', + children=True) + rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + + r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) + + r'))(?:[^a-z0-9]|$)', + safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) + rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + + r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) + + r'\.)+(?:'+build_or_pattern(tlds) + + r'))(?:[^a-z0-9]|$)', + safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) return rebulk diff --git a/guessit/test/test_yml.py b/guessit/test/test_yml.py index ccd78e1..ecb8f8e 100644 --- a/guessit/test/test_yml.py +++ b/guessit/test/test_yml.py @@ -10,11 +10,10 @@ import yaml import six -import regex as re - import babelfish import pytest +from rebulk.remodule import re from rebulk.utils import is_iterable from guessit.options import parse_options diff --git a/setup.py b/setup.py index 6720376..7e583cf 100644 --- a/setup.py +++ b/setup.py @@ -16,11 +16,13 @@ with io.open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: with io.open(os.path.join(here, 'HISTORY.rst'), encoding='utf-8') as f: history = f.read() -install_requires = ['rebulk>=0.6.4', 'regex', 'babelfish>=0.5.5', 'python-dateutil'] +install_requires = ['rebulk>=0.7.0', 'babelfish>=0.5.5', 'python-dateutil'] if sys.version_info < (2, 7): install_requires.extend(['argparse', 'ordereddict']) setup_requires = ['pytest-runner'] +native_require = ['regex'] + dev_require = ['zest.releaser[recommended]', 'pylint', 'tox', 'sphinx', 'sphinx-autobuild'] tests_require = ['pytest>=2.7.3', 'pytest-benchmark', 'pytest-capturelog', 'PyYAML'] @@ -68,7 +70,8 @@ args = dict(name='guessit', zip_safe=True, extras_require={ 'test': tests_require, - 'dev': dev_require + 'dev': dev_require, + 'native': native_require }) setup(**args)