Merge pull request #419 from guessit-io/feature/detection-improvements

Detection improvements: format, video_profile, audio_channels, streaming_service, UltraHD
This commit is contained in:
Rato
2017-02-28 22:43:04 +01:00
committed by GitHub
15 changed files with 266 additions and 25 deletions
+2 -2
View File
@@ -120,7 +120,7 @@ Video properties
Format of the initial source
- ``TV`` ``HDTV`` ``AHDTV`` ``HDTC`` ``SATRip`` ``WEB-DL`` ``VOD`` ``BluRay`` ``DVD`` ``WEBRip`` ``Workprint`` ``Telecine`` ``VHS`` ``DVB`` ``Telesync`` ``HD-DVD`` ``PPV`` ``Cam``
- ``TV`` ``HDTV`` ``UHDTV`` ``AHDTV`` ``HDTC`` ``SATRip`` ``WEB-DL`` ``VOD`` ``BluRay`` ``DVD`` ``WEBRip`` ``Workprint`` ``Telecine`` ``VHS`` ``DVB`` ``Telesync`` ``HD-DVD`` ``PPV`` ``Cam``
- **screen_size**
@@ -253,7 +253,7 @@ Other properties
Other property will appear under this property.
- ``Fansub``, ``HR``, ``HQ``, ``Screener``, ``Unrated``, ``HD``, ``3D``, ``SyncFix``, ``Bonus``,
- ``Fansub``, ``HR``, ``HQ``, ``Screener``, ``Unrated``, ``HD``, ``UltraHD``, ``3D``, ``SyncFix``, ``Bonus``,
``WideScreen``, ``Fastsub``, ``R5``, ``AudioFix``, ``DDC``, ``Trailer``, ``Complete``, ``Limited``, ``Classic``,
``Proper``, ``DualAudio``, ``LiNE``, ``LD``, ``MD``, ``XXX``, ``Remastered``, ``Extended``, ``Extended Cut``,
``Uncut``, ``Retail``, ``ReEncoded``, ``Mux``, ``Hardcoded Subtitles``
+23 -1
View File
@@ -58,12 +58,15 @@ def audio_codec():
rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
rebulk.string('7ch', '8ch', value='7.1')
rebulk.string('5ch', '6ch', value='5.1')
rebulk.string('2ch', 'stereo', value='2.0')
rebulk.string('1ch', 'mono', value='1.0')
rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule)
rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule)
return rebulk
@@ -162,3 +165,22 @@ class HqConflictRule(Rule):
if hq_other:
return hq_other
class AudioChannelsValidatorRule(Rule):
"""
Remove audio_channel if no audio codec as previous match.
"""
priority = 128
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for audio_channel in matches.tagged('weak-audio_channels'):
valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
lambda match: match.name == 'audio_codec')
if not valid_before:
ret.append(audio_channel)
return ret
+4 -2
View File
@@ -58,10 +58,12 @@ def episodes():
:return:
"""
if match.name == 'episode' and other.name in \
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date', 'year']:
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date', 'year'] \
and 'weak-audio_channels' not in other.tags:
return match
if match.name == 'season' and other.name in \
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']:
['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] \
and 'weak-audio_channels' not in other.tags:
return match
if match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \
and match.initiator != other.initiator:
+5 -2
View File
@@ -17,7 +17,7 @@ def format_():
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name="format", tags='video-codec-prefix')
rebulk.defaults(name="format", tags=['video-codec-prefix', 'streaming_service.suffix'])
rebulk.regex("VHS", "VHS-?Rip", value="VHS")
rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
@@ -31,13 +31,16 @@ def format_():
rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))", # "DVD-?R(?:$|^E)" => DVD-Real ...
"DVD-?9", "DVD-?5", value="DVD")
rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV")
rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV",
conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
rebulk.regex("VOD", "VOD-?Rip", value="VOD")
rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip")
rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", "DL-?WEB", "DL(?=-?Mux)", value="WEB-DL")
rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
rebulk.regex("AHDTV", value="AHDTV")
rebulk.regex('UHD-?TV', 'UHD-?Rip', value='UHDTV',
conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
rebulk.regex("HDTC", value="HDTC")
rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip")
+41 -3
View File
@@ -10,7 +10,7 @@ from rebulk.remodule import re
from ..common import dash
from ..common import seps
from ..common.validators import seps_after, seps_surround, compose
from ..common.validators import seps_after, seps_before, seps_surround, compose
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
@@ -63,10 +63,15 @@ def other():
rebulk.regex('(?:PS-?)?Vita', value='PS Vita')
for value in (
'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
'Screener', 'Remux', 'Remastered', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC',
'CC', 'LD', 'MD', 'XXX'):
rebulk.string(value, value=value)
rebulk.string('LDTV', value='LD')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='UltraHD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut',
'Extended', 'Extended Cut'):
@@ -82,7 +87,7 @@ def other():
rebulk.string('HC', value='Hardcoded Subtitles')
rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule,
ValidateMuxRule, ValidateHardcodedSubs, ProperCountRule)
ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ProperCountRule)
return rebulk
@@ -227,3 +232,36 @@ class ValidateHardcodedSubs(Rule):
to_remove.append(hc_match)
return to_remove
class ValidateStreamingServiceNeighbor(Rule):
"""Validate streaming service's neighbors."""
priority = 32
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for match in matches.named('other',
predicate=lambda m: ('streaming_service.prefix' in m.tags or
'streaming_service.suffix' in m.tags)):
if not seps_after(match):
if 'streaming_service.prefix' in match.tags:
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
if next_match and not matches.holes(match.end, next_match.start,
predicate=lambda m: m.value.strip(seps)):
continue
to_remove.append(match)
elif not seps_before(match):
if 'streaming_service.suffix' in match.tags:
previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
if previous_match and not matches.holes(previous_match.end, match.start,
predicate=lambda m: m.value.strip(seps)):
continue
to_remove.append(match)
return to_remove
+2 -1
View File
@@ -29,7 +29,7 @@ def screen_size():
return other
return '__default__'
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)
rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
@@ -45,6 +45,7 @@ def screen_size():
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
rebulk.string('4k', value='4K')
_digits_re = re.compile(r'\d+')
+15 -9
View File
@@ -9,7 +9,6 @@ from rebulk import Rebulk
from rebulk.rules import Rule, RemoveMatch
from ...rules.common import seps, dash
from ...rules.common.validators import seps_surround
def streaming_service():
@@ -19,7 +18,7 @@ def streaming_service():
:rtype: Rebulk
"""
rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', validator=seps_surround)
rebulk.defaults(name='streaming_service', tags=['format-prefix'])
rebulk.string('AE', 'A&E', value='A&E')
rebulk.string('AMBC', value='ABC')
@@ -55,6 +54,7 @@ def streaming_service():
rebulk.string('NFL', value='NFL')
rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
rebulk.string('NF', 'Netflix', value='Netflix')
rebulk.string('iTunes', value='iTunes')
rebulk.string('RTE', value='RTÉ One')
rebulk.string('SESO', 'SeeSo', value='SeeSo')
rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV')
@@ -89,14 +89,20 @@ class ValidateStreamingService(Rule):
"""
to_remove = []
for service in matches.named('streaming_service'):
next_match = matches.next(service, predicate=lambda match: match.name == 'format', index=0)
if next_match and not matches.holes(service.end, next_match.start,
predicate=lambda match: match.value.strip(seps)):
if service.value == 'Comedy Central':
# Current match is a valid streaming service, removing invalid closed caption (CC) matches
to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC'))
next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
has_other = service.initiator and service.initiator.children.named('other')
if not has_other and \
(not next_match or matches.holes(service.end, next_match.start,
predicate=lambda match: match.value.strip(seps))) and \
(not previous_match or matches.holes(previous_match.end, service.start,
predicate=lambda match: match.value.strip(seps))):
to_remove.append(service)
continue
to_remove.append(service)
if service.value == 'Comedy Central':
# Current match is a valid streaming service, removing invalid closed caption (CC) matches
to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC'))
return to_remove
+6 -3
View File
@@ -18,7 +18,7 @@ def video_codec():
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="video_codec", tags='format-suffix')
rebulk.defaults(name="video_codec", tags=['format-suffix', 'streaming_service.suffix'])
rebulk.regex(r"Rv\d{2}", value="Real")
rebulk.regex("Mpeg2", value="Mpeg2")
@@ -26,12 +26,14 @@ def video_codec():
rebulk.regex("XviD", value="XviD")
rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVC(?:HD)?", value="h264")
rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")
rebulk.regex('(?P<video_codec>hevc)(?P<video_profile>10)', value={'video_codec': 'h265', 'video_profile': '10bit'},
tags=['video-codec-suffix'], children=True)
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
# http://fr.wikipedia.org/wiki/H.264
rebulk.defaults(name="video_profile", validator=seps_surround)
rebulk.regex('10.?bits?', 'Hi10P?', value='10bit')
rebulk.regex('10.?bits?', 'Hi10P?', 'YUV420P10', value='10bit')
rebulk.regex('8.?bits?', value='8bit')
rebulk.string('BP', value='BP', tags='video_profile.rule')
@@ -62,7 +64,8 @@ class ValidateVideoCodec(Rule):
not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
ret.append(codec)
continue
if not seps_after(codec):
if not seps_after(codec) and \
not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
ret.append(codec)
continue
return ret
+74
View File
@@ -3575,3 +3575,77 @@
language: und
crc32: EDA6E7F1
type: episode
# NetflixUHD
? Show.Name.S01E06.NetflixUHD
: title: Show Name
season: 1
episode: 6
streaming_service: Netflix
other: UltraHD
type: episode
? Show.Name.S04E13.FINAL.MULTI.DD51.2160p.NetflixUHDRip.x265-TVS
: title: Show Name
season: 4
episode: 13
other: FINAL
language: mul
audio_codec: DolbyDigital
audio_channels: '5.1'
screen_size: 4K
streaming_service: Netflix
format: UHDTV
video_codec: h265
release_group: TVS
type: episode
? Show.Name.S06E11.Of.Late.I.Think.of.Rosewood.iTunesHD.x264
: title: Show Name
season: 6
episode: 11
episode_title: Of Late I Think of Rosewood
streaming_service: iTunes
other: HD
video_codec: h264
type: episode
? Show.Name.S01.720p.iTunes.h264-Group
: title: Show Name
season: 1
screen_size: 720p
streaming_service: iTunes
video_codec: h264
release_group: Group
type: episode
? Show.Name.1x01.eps1.0.hellofriend.(HDiTunes.Ac3.Esp).(2015).By.Malaguita.avi
: title: Show Name
season: 1
episode: 1
episode_title: eps1 0 hellofriend
other: HD
streaming_service: iTunes
audio_codec: AC3
language: spa
year: 2015
container: avi
type: episode
? "[Hanamaru&LoliHouse] The Dragon Dentist - 01 [WebRip 1920x1080 HEVC-yuv420p10 AAC].mkv"
: release_group: Hanamaru&LoliHouse
title: The Dragon Dentist
episode: 1
format: WEBRip
screen_size: 1080p
video_codec: h265
video_profile: 10bit
audio_codec: AAC
container: mkv
type: episode
? Show Name - Season 1 Episode 50
: title: Show Name
season: 1
episode: 50
type: episode
+47 -1
View File
@@ -855,4 +855,50 @@
? Mad Max Beyond Thunderdome ()
: title: Mad Max Beyond Thunderdome
type: movie
type: movie
? Hacksaw Ridge 2016 Multi 2160p UHD BluRay Hevc10 HDR10 DTSHD & ATMOS 7.1 -DDR.mkv
: title: Hacksaw Ridge
year: 2016
language: mul
screen_size: 4K
other: UltraHD
format: BluRay
video_codec: h265
video_profile: 10bit
audio_codec: [DTS, DolbyAtmos]
audio_profile: HD
audio_channels: '7.1'
release_group: DDR
container: mkv
type: movie
? Special.Correspondents.2016.iTA.ENG.4K.2160p.NetflixUHD.TeamPremium.mp4
: title: Special Correspondents
year: 2016
language: [it, en]
screen_size: 4K
streaming_service: Netflix
other: UltraHD
release_group: TeamPremium
container: mp4
type: movie
? -Special.Correspondents.2016.iTA.ENG.4K.2160p.NetflixUHD.TeamPremium.mp4
: alternative_title: 4K
? -Special.Correspondents.2016.iTA.ENG.4K.2160p.NetflixUHD.TeamPremium.mp4
: alternative_title: 2160p
? Suicide Squad EXTENDED (2016) 2160p 4K UltraHD Blu-Ray x265 (HEVC 10bit BT709) Dolby Atmos 7.1 -DDR
: title: Suicide Squad
other: [Extended, UltraHD]
year: 2016
screen_size: 4K
format: BluRay
video_codec: h265
video_profile: 10bit
audio_codec: DolbyAtmos
audio_channels: '7.1'
release_group: DDR
type: movie
+11
View File
@@ -36,6 +36,12 @@
? +trueHD
: audio_codec: TrueHD
? +True-HD51
? +trueHD51
: audio_codec: TrueHD
audio_channels: '5.1'
? +DTS-HD
: audio_codec: DTS
audio_profile: HD
@@ -57,6 +63,7 @@
audio_profile: LC
? +AAC2.0
? +AAC20
: audio_codec: AAC
audio_channels: '2.0'
@@ -80,5 +87,9 @@
: audio_channels: '1.0'
? DD5.1
? DD51
: audio_codec: DolbyDigital
audio_channels: '5.1'
? -51
: audio_channels: '5.1'
+21 -1
View File
@@ -249,4 +249,24 @@
? epi
: options: -t episode
title: epi
title: epi
? Episode20
? Episode 20
: episode: 20
? Episode50
? Episode 50
: episode: 50
? Episode51
? Episode 51
: episode: 51
? Episode70
? Episode 70
: episode: 70
? Episode71
? Episode 71
: episode: 71
+4
View File
@@ -132,3 +132,7 @@
? HDTC
: format: HDTC
? UHDTV
? UHDRip
: format: UHDTV
+6
View File
@@ -87,6 +87,12 @@
? HD
: other: HD
? UHD
? Ultra
? UltraHD
? Ultra HD
: other: UltraHD
? mHD # ??
: other: mHD
+5
View File
@@ -49,6 +49,11 @@
? -x264
: video_codec: h265
? hevc10
? HEVC-YUV420P10
: video_codec: h265
video_profile: 10bit
? h265-HP
: video_codec: h265
video_profile: HP