diff --git a/docs/properties.rst b/docs/properties.rst index 38a97a3..6ce5833 100644 --- a/docs/properties.rst +++ b/docs/properties.rst @@ -120,7 +120,7 @@ Video properties Format of the initial source - - ``TV`` ``HDTV`` ``AHDTV`` ``HDTC`` ``SATRip`` ``WEB-DL`` ``VOD`` ``BluRay`` ``DVD`` ``WEBRip`` ``Workprint`` ``Telecine`` ``VHS`` ``DVB`` ``Telesync`` ``HD-DVD`` ``PPV`` ``Cam`` + - ``TV`` ``HDTV`` ``UHDTV`` ``AHDTV`` ``HDTC`` ``SATRip`` ``WEB-DL`` ``VOD`` ``BluRay`` ``DVD`` ``WEBRip`` ``Workprint`` ``Telecine`` ``VHS`` ``DVB`` ``Telesync`` ``HD-DVD`` ``PPV`` ``Cam`` - **screen_size** @@ -253,7 +253,7 @@ Other properties Other property will appear under this property. - - ``Fansub``, ``HR``, ``HQ``, ``Screener``, ``Unrated``, ``HD``, ``3D``, ``SyncFix``, ``Bonus``, + - ``Fansub``, ``HR``, ``HQ``, ``Screener``, ``Unrated``, ``HD``, ``UltraHD``, ``3D``, ``SyncFix``, ``Bonus``, ``WideScreen``, ``Fastsub``, ``R5``, ``AudioFix``, ``DDC``, ``Trailer``, ``Complete``, ``Limited``, ``Classic``, ``Proper``, ``DualAudio``, ``LiNE``, ``LD``, ``MD``, ``XXX``, ``Remastered``, ``Extended``, ``Extended Cut``, ``Uncut``, ``Retail``, ``ReEncoded``, ``Mux``, ``Hardcoded Subtitles`` diff --git a/guessit/rules/properties/audio_codec.py b/guessit/rules/properties/audio_codec.py index e1e712c..79a6e8e 100644 --- a/guessit/rules/properties/audio_codec.py +++ b/guessit/rules/properties/audio_codec.py @@ -58,12 +58,15 @@ def audio_codec(): rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True) rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True) rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True) + rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') + rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') + rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') rebulk.string('7ch', '8ch', value='7.1') rebulk.string('5ch', '6ch', value='5.1') rebulk.string('2ch', 'stereo', value='2.0') rebulk.string('1ch', 'mono', value='1.0') - rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule) + rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk @@ -162,3 +165,22 @@ class HqConflictRule(Rule): if hq_other: return hq_other + + +class AudioChannelsValidatorRule(Rule): + """ + Remove audio_channel if no audio codec as previous match. + """ + priority = 128 + consequence = RemoveMatch + + def when(self, matches, context): + ret = [] + + for audio_channel in matches.tagged('weak-audio_channels'): + valid_before = matches.range(audio_channel.start - 1, audio_channel.start, + lambda match: match.name == 'audio_codec') + if not valid_before: + ret.append(audio_channel) + + return ret diff --git a/guessit/rules/properties/episodes.py b/guessit/rules/properties/episodes.py index 6d0abe6..1fb87f3 100644 --- a/guessit/rules/properties/episodes.py +++ b/guessit/rules/properties/episodes.py @@ -58,10 +58,12 @@ def episodes(): :return: """ if match.name == 'episode' and other.name in \ - ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date', 'year']: + ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date', 'year'] \ + and 'weak-audio_channels' not in other.tags: return match if match.name == 'season' and other.name in \ - ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: + ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] \ + and 'weak-audio_channels' not in other.tags: return match if match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: diff --git a/guessit/rules/properties/format.py b/guessit/rules/properties/format.py index 4974b39..83a9a2f 100644 --- a/guessit/rules/properties/format.py +++ b/guessit/rules/properties/format.py @@ -17,7 +17,7 @@ def format_(): :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) - rebulk.defaults(name="format", tags='video-codec-prefix') + rebulk.defaults(name="format", tags=['video-codec-prefix', 'streaming_service.suffix']) rebulk.regex("VHS", "VHS-?Rip", value="VHS") rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam") @@ -31,13 +31,16 @@ def format_(): rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ... "DVD-?9", "DVD-?5", value="DVD") - rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV") + rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV", + conflict_solver=lambda match, other: other if other.name == 'other' else '__default__') rebulk.regex("VOD", "VOD-?Rip", value="VOD") rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip") rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", "DL-?WEB", "DL(?=-?Mux)", value="WEB-DL") rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD") rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay") rebulk.regex("AHDTV", value="AHDTV") + rebulk.regex('UHD-?TV', 'UHD-?Rip', value='UHDTV', + conflict_solver=lambda match, other: other if other.name == 'other' else '__default__') rebulk.regex("HDTC", value="HDTC") rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip") diff --git a/guessit/rules/properties/other.py b/guessit/rules/properties/other.py index 28e44a9..fd85204 100644 --- a/guessit/rules/properties/other.py +++ b/guessit/rules/properties/other.py @@ -10,7 +10,7 @@ from rebulk.remodule import re from ..common import dash from ..common import seps -from ..common.validators import seps_after, seps_surround, compose +from ..common.validators import seps_after, seps_before, seps_surround, compose from ...reutils import build_or_pattern from ...rules.common.formatters import raw_cleanup @@ -63,10 +63,15 @@ def other(): rebulk.regex('(?:PS-?)?Vita', value='PS Vita') for value in ( - 'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC', + 'Screener', 'Remux', 'Remastered', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC', 'CC', 'LD', 'MD', 'XXX'): rebulk.string(value, value=value) + rebulk.string('LDTV', value='LD') + rebulk.string('HD', value='HD', validator=None, + tags=['streaming_service.prefix', 'streaming_service.suffix']) + rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='UltraHD', validator=None, + tags=['streaming_service.prefix', 'streaming_service.suffix']) for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut', 'Extended', 'Extended Cut'): @@ -82,7 +87,7 @@ def other(): rebulk.string('HC', value='Hardcoded Subtitles') rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, - ValidateMuxRule, ValidateHardcodedSubs, ProperCountRule) + ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ProperCountRule) return rebulk @@ -227,3 +232,36 @@ class ValidateHardcodedSubs(Rule): to_remove.append(hc_match) return to_remove + + +class ValidateStreamingServiceNeighbor(Rule): + """Validate streaming service's neighbors.""" + + priority = 32 + consequence = RemoveMatch + + def when(self, matches, context): + to_remove = [] + for match in matches.named('other', + predicate=lambda m: ('streaming_service.prefix' in m.tags or + 'streaming_service.suffix' in m.tags)): + + if not seps_after(match): + if 'streaming_service.prefix' in match.tags: + next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0) + if next_match and not matches.holes(match.end, next_match.start, + predicate=lambda m: m.value.strip(seps)): + continue + + to_remove.append(match) + + elif not seps_before(match): + if 'streaming_service.suffix' in match.tags: + previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0) + if previous_match and not matches.holes(previous_match.end, match.start, + predicate=lambda m: m.value.strip(seps)): + continue + + to_remove.append(match) + + return to_remove diff --git a/guessit/rules/properties/screen_size.py b/guessit/rules/properties/screen_size.py index 03ee5af..b7732ab 100644 --- a/guessit/rules/properties/screen_size.py +++ b/guessit/rules/properties/screen_size.py @@ -29,7 +29,7 @@ def screen_size(): return other return '__default__' - rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) + rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE) rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver) rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p") @@ -45,6 +45,7 @@ def screen_size(): rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K") + rebulk.string('4k', value='4K') _digits_re = re.compile(r'\d+') diff --git a/guessit/rules/properties/streaming_service.py b/guessit/rules/properties/streaming_service.py index 1e48108..b31690d 100644 --- a/guessit/rules/properties/streaming_service.py +++ b/guessit/rules/properties/streaming_service.py @@ -9,7 +9,6 @@ from rebulk import Rebulk from rebulk.rules import Rule, RemoveMatch from ...rules.common import seps, dash -from ...rules.common.validators import seps_surround def streaming_service(): @@ -19,7 +18,7 @@ def streaming_service(): :rtype: Rebulk """ rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) - rebulk.defaults(name='streaming_service', validator=seps_surround) + rebulk.defaults(name='streaming_service', tags=['format-prefix']) rebulk.string('AE', 'A&E', value='A&E') rebulk.string('AMBC', value='ABC') @@ -55,6 +54,7 @@ def streaming_service(): rebulk.string('NFL', value='NFL') rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon') rebulk.string('NF', 'Netflix', value='Netflix') + rebulk.string('iTunes', value='iTunes') rebulk.string('RTE', value='RTÉ One') rebulk.string('SESO', 'SeeSo', value='SeeSo') rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV') @@ -89,14 +89,20 @@ class ValidateStreamingService(Rule): """ to_remove = [] for service in matches.named('streaming_service'): - next_match = matches.next(service, predicate=lambda match: match.name == 'format', index=0) - if next_match and not matches.holes(service.end, next_match.start, - predicate=lambda match: match.value.strip(seps)): - if service.value == 'Comedy Central': - # Current match is a valid streaming service, removing invalid closed caption (CC) matches - to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC')) + next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0) + previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0) + has_other = service.initiator and service.initiator.children.named('other') + + if not has_other and \ + (not next_match or matches.holes(service.end, next_match.start, + predicate=lambda match: match.value.strip(seps))) and \ + (not previous_match or matches.holes(previous_match.end, service.start, + predicate=lambda match: match.value.strip(seps))): + to_remove.append(service) continue - to_remove.append(service) + if service.value == 'Comedy Central': + # Current match is a valid streaming service, removing invalid closed caption (CC) matches + to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC')) return to_remove diff --git a/guessit/rules/properties/video_codec.py b/guessit/rules/properties/video_codec.py index 909de75..8666146 100644 --- a/guessit/rules/properties/video_codec.py +++ b/guessit/rules/properties/video_codec.py @@ -18,7 +18,7 @@ def video_codec(): :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) - rebulk.defaults(name="video_codec", tags='format-suffix') + rebulk.defaults(name="video_codec", tags=['format-suffix', 'streaming_service.suffix']) rebulk.regex(r"Rv\d{2}", value="Real") rebulk.regex("Mpeg2", value="Mpeg2") @@ -26,12 +26,14 @@ def video_codec(): rebulk.regex("XviD", value="XviD") rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVC(?:HD)?", value="h264") rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265") + rebulk.regex('(?Phevc)(?P10)', value={'video_codec': 'h265', 'video_profile': '10bit'}, + tags=['video-codec-suffix'], children=True) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://fr.wikipedia.org/wiki/H.264 rebulk.defaults(name="video_profile", validator=seps_surround) - rebulk.regex('10.?bits?', 'Hi10P?', value='10bit') + rebulk.regex('10.?bits?', 'Hi10P?', 'YUV420P10', value='10bit') rebulk.regex('8.?bits?', value='8bit') rebulk.string('BP', value='BP', tags='video_profile.rule') @@ -62,7 +64,8 @@ class ValidateVideoCodec(Rule): not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags): ret.append(codec) continue - if not seps_after(codec): + if not seps_after(codec) and \ + not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags): ret.append(codec) continue return ret diff --git a/guessit/test/episodes.yml b/guessit/test/episodes.yml index 0e7f6bb..479c481 100644 --- a/guessit/test/episodes.yml +++ b/guessit/test/episodes.yml @@ -3575,3 +3575,77 @@ language: und crc32: EDA6E7F1 type: episode + +# NetflixUHD +? Show.Name.S01E06.NetflixUHD +: title: Show Name + season: 1 + episode: 6 + streaming_service: Netflix + other: UltraHD + type: episode + +? Show.Name.S04E13.FINAL.MULTI.DD51.2160p.NetflixUHDRip.x265-TVS +: title: Show Name + season: 4 + episode: 13 + other: FINAL + language: mul + audio_codec: DolbyDigital + audio_channels: '5.1' + screen_size: 4K + streaming_service: Netflix + format: UHDTV + video_codec: h265 + release_group: TVS + type: episode + +? Show.Name.S06E11.Of.Late.I.Think.of.Rosewood.iTunesHD.x264 +: title: Show Name + season: 6 + episode: 11 + episode_title: Of Late I Think of Rosewood + streaming_service: iTunes + other: HD + video_codec: h264 + type: episode + +? Show.Name.S01.720p.iTunes.h264-Group +: title: Show Name + season: 1 + screen_size: 720p + streaming_service: iTunes + video_codec: h264 + release_group: Group + type: episode + +? Show.Name.1x01.eps1.0.hellofriend.(HDiTunes.Ac3.Esp).(2015).By.Malaguita.avi +: title: Show Name + season: 1 + episode: 1 + episode_title: eps1 0 hellofriend + other: HD + streaming_service: iTunes + audio_codec: AC3 + language: spa + year: 2015 + container: avi + type: episode + +? "[Hanamaru&LoliHouse] The Dragon Dentist - 01 [WebRip 1920x1080 HEVC-yuv420p10 AAC].mkv" +: release_group: Hanamaru&LoliHouse + title: The Dragon Dentist + episode: 1 + format: WEBRip + screen_size: 1080p + video_codec: h265 + video_profile: 10bit + audio_codec: AAC + container: mkv + type: episode + +? Show Name - Season 1 Episode 50 +: title: Show Name + season: 1 + episode: 50 + type: episode diff --git a/guessit/test/movies.yml b/guessit/test/movies.yml index 9d8f973..19854fc 100644 --- a/guessit/test/movies.yml +++ b/guessit/test/movies.yml @@ -855,4 +855,50 @@ ? Mad Max Beyond Thunderdome () : title: Mad Max Beyond Thunderdome - type: movie \ No newline at end of file + type: movie + +? Hacksaw Ridge 2016 Multi 2160p UHD BluRay Hevc10 HDR10 DTSHD & ATMOS 7.1 -DDR.mkv +: title: Hacksaw Ridge + year: 2016 + language: mul + screen_size: 4K + other: UltraHD + format: BluRay + video_codec: h265 + video_profile: 10bit + audio_codec: [DTS, DolbyAtmos] + audio_profile: HD + audio_channels: '7.1' + release_group: DDR + container: mkv + type: movie + +? Special.Correspondents.2016.iTA.ENG.4K.2160p.NetflixUHD.TeamPremium.mp4 +: title: Special Correspondents + year: 2016 + language: [it, en] + screen_size: 4K + streaming_service: Netflix + other: UltraHD + release_group: TeamPremium + container: mp4 + type: movie + +? -Special.Correspondents.2016.iTA.ENG.4K.2160p.NetflixUHD.TeamPremium.mp4 +: alternative_title: 4K + +? -Special.Correspondents.2016.iTA.ENG.4K.2160p.NetflixUHD.TeamPremium.mp4 +: alternative_title: 2160p + +? Suicide Squad EXTENDED (2016) 2160p 4K UltraHD Blu-Ray x265 (HEVC 10bit BT709) Dolby Atmos 7.1 -DDR +: title: Suicide Squad + other: [Extended, UltraHD] + year: 2016 + screen_size: 4K + format: BluRay + video_codec: h265 + video_profile: 10bit + audio_codec: DolbyAtmos + audio_channels: '7.1' + release_group: DDR + type: movie diff --git a/guessit/test/rules/audio_codec.yml b/guessit/test/rules/audio_codec.yml index 22a6d0e..1d94205 100644 --- a/guessit/test/rules/audio_codec.yml +++ b/guessit/test/rules/audio_codec.yml @@ -36,6 +36,12 @@ ? +trueHD : audio_codec: TrueHD +? +True-HD51 +? +trueHD51 +: audio_codec: TrueHD + audio_channels: '5.1' + + ? +DTS-HD : audio_codec: DTS audio_profile: HD @@ -57,6 +63,7 @@ audio_profile: LC ? +AAC2.0 +? +AAC20 : audio_codec: AAC audio_channels: '2.0' @@ -80,5 +87,9 @@ : audio_channels: '1.0' ? DD5.1 +? DD51 : audio_codec: DolbyDigital audio_channels: '5.1' + +? -51 +: audio_channels: '5.1' diff --git a/guessit/test/rules/episodes.yml b/guessit/test/rules/episodes.yml index e631ee5..29e59c6 100644 --- a/guessit/test/rules/episodes.yml +++ b/guessit/test/rules/episodes.yml @@ -249,4 +249,24 @@ ? epi : options: -t episode - title: epi \ No newline at end of file + title: epi + +? Episode20 +? Episode 20 +: episode: 20 + +? Episode50 +? Episode 50 +: episode: 50 + +? Episode51 +? Episode 51 +: episode: 51 + +? Episode70 +? Episode 70 +: episode: 70 + +? Episode71 +? Episode 71 +: episode: 71 \ No newline at end of file diff --git a/guessit/test/rules/format.yml b/guessit/test/rules/format.yml index 3deefe8..e983cfb 100644 --- a/guessit/test/rules/format.yml +++ b/guessit/test/rules/format.yml @@ -132,3 +132,7 @@ ? HDTC : format: HDTC + +? UHDTV +? UHDRip +: format: UHDTV diff --git a/guessit/test/rules/other.yml b/guessit/test/rules/other.yml index 7f0e8c0..9ee902b 100644 --- a/guessit/test/rules/other.yml +++ b/guessit/test/rules/other.yml @@ -87,6 +87,12 @@ ? HD : other: HD +? UHD +? Ultra +? UltraHD +? Ultra HD +: other: UltraHD + ? mHD # ?? : other: mHD diff --git a/guessit/test/rules/video_codec.yml b/guessit/test/rules/video_codec.yml index d195eaa..a11991e 100644 --- a/guessit/test/rules/video_codec.yml +++ b/guessit/test/rules/video_codec.yml @@ -49,6 +49,11 @@ ? -x264 : video_codec: h265 +? hevc10 +? HEVC-YUV420P10 +: video_codec: h265 + video_profile: 10bit + ? h265-HP : video_codec: h265 video_profile: HP \ No newline at end of file