From 0bda2a73bd83d1e0b4358acdef4449e3edb41d59 Mon Sep 17 00:00:00 2001 From: Rato Date: Thu, 20 Jul 2017 22:23:09 +0200 Subject: [PATCH 1/3] Fix for #481: Enhance output value --- HISTORY.rst | 1 + docs/properties.rst | 4 ++- guessit/__init__.py | 1 + guessit/rules/common/quantity.py | 55 ++++++++++++++++++++++++++++++++ guessit/rules/properties/size.py | 10 ++---- 5 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 guessit/rules/common/quantity.py diff --git a/HISTORY.rst b/HISTORY.rst index ebbec41..e30aa9e 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -97,6 +97,7 @@ History - Added default and configurable list of allowed languages and countries - Added `VC-1` as new `video_codec` possible value - Enhanced dash-separated `release_group` detection. +- Changed `size` output to return `guessit.Quantity` object. 2.1.4 (2017-06-01) ------------------ diff --git a/docs/properties.rst b/docs/properties.rst index a5c07ea..2c58dae 100644 --- a/docs/properties.rst +++ b/docs/properties.rst @@ -265,7 +265,9 @@ Other properties - **size** - Size (MB, GB, TB). + Size (MB, GB, TB). Examples: ``1.2GB`` (````), ``430MB`` (````). + + - ``[]`` (object has ``magnitude`` and ``units``) - **edition** diff --git a/guessit/__init__.py b/guessit/__init__.py index 22e9dbb..4b519c9 100644 --- a/guessit/__init__.py +++ b/guessit/__init__.py @@ -5,5 +5,6 @@ Extracts as much information as possible from a video file. """ from .api import guessit, GuessItApi from .options import ConfigurationException +from .rules.common.quantity import Quantity from .__version__ import __version__ diff --git a/guessit/rules/common/quantity.py b/guessit/rules/common/quantity.py new file mode 100644 index 0000000..57614af --- /dev/null +++ b/guessit/rules/common/quantity.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Quantity +""" +import re + +import six + + +class Quantity(object): + """ + Represent a quantity object with magnitude and units. + """ + + parser_re = re.compile(r'(?P\d+(?:[.]\d+)?)(?P[^\d]+)') + + def __init__(self, magnitude, units): + self.magnitude = magnitude + self.units = units + + @classmethod + def fromstring(cls, string): + """ + Parse the string into a quantity object. + :param string: + :return: + """ + values = cls.parser_re.match(string).groupdict() + try: + magnitude = int(values['magnitude']) + except ValueError: + magnitude = float(values['magnitude']) + units = values['units'].upper() + + return Quantity(magnitude, units) + + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, six.string_types): + return str(self) == other + if not isinstance(other, Quantity): + return NotImplemented + return self.magnitude == other.magnitude and self.units == other.units + + def __ne__(self, other): + return not self == other + + def __repr__(self): + return ''.format(self) + + def __str__(self): + return '{0}{1}'.format(self.magnitude, self.units) diff --git a/guessit/rules/properties/size.py b/guessit/rules/properties/size.py index 84f0303..2657dd2 100644 --- a/guessit/rules/properties/size.py +++ b/guessit/rules/properties/size.py @@ -7,8 +7,9 @@ import re from rebulk import Rebulk -from ..common.validators import seps_surround from ..common import dash +from ..common.quantity import Quantity +from ..common.validators import seps_surround def size(): @@ -17,13 +18,8 @@ def size(): :return: Created Rebulk object :rtype: Rebulk """ - - def format_size(value): - """Format size using uppercase and no space.""" - return re.sub(r'(?<=\d)[.](?=[^\d])', '', value.upper()) - rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='size', validator=seps_surround) - rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=format_size, tags=['release-group-prefix']) + rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=Quantity.fromstring, tags=['release-group-prefix']) return rebulk From ed6db27ddcd5f0fcd7a21d95c4bf44814f2ed4aa Mon Sep 17 00:00:00 2001 From: Rato Date: Sun, 23 Jul 2017 10:05:30 +0200 Subject: [PATCH 2/3] Using class Size instead of Quantity. --- HISTORY.rst | 1 + docs/properties.rst | 4 ++-- guessit/__init__.py | 2 +- guessit/rules/common/quantity.py | 18 ++++++++++++++---- guessit/rules/properties/size.py | 4 ++-- 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index e30aa9e..37b012f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -98,6 +98,7 @@ History - Added `VC-1` as new `video_codec` possible value - Enhanced dash-separated `release_group` detection. - Changed `size` output to return `guessit.Quantity` object. +- Changed `size` output to return `guessit.Size` object. 2.1.4 (2017-06-01) ------------------ diff --git a/docs/properties.rst b/docs/properties.rst index 2c58dae..c506bc5 100644 --- a/docs/properties.rst +++ b/docs/properties.rst @@ -265,9 +265,9 @@ Other properties - **size** - Size (MB, GB, TB). Examples: ``1.2GB`` (````), ``430MB`` (````). + Size (MB, GB, TB). Examples: ``1.2GB`` (````), ``430MB`` (````). - - ``[]`` (object has ``magnitude`` and ``units``) + - ``[]`` (object has ``magnitude`` and ``units``) - **edition** diff --git a/guessit/__init__.py b/guessit/__init__.py index 4b519c9..365935e 100644 --- a/guessit/__init__.py +++ b/guessit/__init__.py @@ -5,6 +5,6 @@ Extracts as much information as possible from a video file. """ from .api import guessit, GuessItApi from .options import ConfigurationException -from .rules.common.quantity import Quantity +from .rules.common.quantity import Size from .__version__ import __version__ diff --git a/guessit/rules/common/quantity.py b/guessit/rules/common/quantity.py index 57614af..57e455e 100644 --- a/guessit/rules/common/quantity.py +++ b/guessit/rules/common/quantity.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ -Quantity +Quantities: Size """ import re @@ -33,7 +33,7 @@ class Quantity(object): magnitude = float(values['magnitude']) units = values['units'].upper() - return Quantity(magnitude, units) + return cls(magnitude, units) def __hash__(self): return hash(str(self)) @@ -41,7 +41,7 @@ class Quantity(object): def __eq__(self, other): if isinstance(other, six.string_types): return str(self) == other - if not isinstance(other, Quantity): + if not isinstance(other, self.__class__): return NotImplemented return self.magnitude == other.magnitude and self.units == other.units @@ -49,7 +49,17 @@ class Quantity(object): return not self == other def __repr__(self): - return ''.format(self) + return '<{0} [{1}]>'.format(self.__class__.__name__, self) def __str__(self): return '{0}{1}'.format(self.magnitude, self.units) + + +class Size(Quantity): + """ + Represent size. + + e.g.: 1.1GB, 300MB + """ + + pass diff --git a/guessit/rules/properties/size.py b/guessit/rules/properties/size.py index 2657dd2..fc4ef3f 100644 --- a/guessit/rules/properties/size.py +++ b/guessit/rules/properties/size.py @@ -8,7 +8,7 @@ import re from rebulk import Rebulk from ..common import dash -from ..common.quantity import Quantity +from ..common.quantity import Size from ..common.validators import seps_surround @@ -20,6 +20,6 @@ def size(): """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='size', validator=seps_surround) - rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=Quantity.fromstring, tags=['release-group-prefix']) + rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix']) return rebulk From 5505f74a995bafdfab8330e97eee9624fffd9e1e Mon Sep 17 00:00:00 2001 From: Rato Date: Sun, 23 Jul 2017 15:41:00 +0200 Subject: [PATCH 3/3] Adding bit rate detection. Fixes #251 and #477 --- HISTORY.rst | 2 ++ docs/properties.rst | 14 ++++++++ guessit/rules/__init__.py | 2 ++ guessit/rules/common/quantity.py | 29 +++++++++++++++-- guessit/rules/properties/bit_rate.py | 48 ++++++++++++++++++++++++++++ guessit/rules/properties/size.py | 2 +- guessit/test/episodes.yml | 11 +++++++ guessit/test/various.yml | 38 ++++++++++++++++++++++ 8 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 guessit/rules/properties/bit_rate.py diff --git a/HISTORY.rst b/HISTORY.rst index 37b012f..04fa8f0 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -99,6 +99,8 @@ History - Enhanced dash-separated `release_group` detection. - Changed `size` output to return `guessit.Quantity` object. - Changed `size` output to return `guessit.Size` object. +- Added `audio_video_rate` as new possible property. +- Added `video_video_rate` as new possible property. 2.1.4 (2017-06-01) ------------------ diff --git a/docs/properties.rst b/docs/properties.rst index c506bc5..d5c39a1 100644 --- a/docs/properties.rst +++ b/docs/properties.rst @@ -180,6 +180,13 @@ Video properties - ``DXVA`` +- **video_bit_rate** + + Video bit rate (Mbps). Examples: ``25Mbps`` (````), ``40Mbps`` (````). + + - ``[]`` (object has ``magnitude`` and ``units``) + + Audio properties ---------------- @@ -206,6 +213,13 @@ Audio properties ``Master Audio`` +- **audio_bit_rate** + + Audio bit rate (Kbps, Mbps). Examples: ``448Kbps`` (````), ``1.5Mbps`` (````). + + - ``[]`` (object has ``magnitude`` and ``units``) + + Localization properties ----------------------- diff --git a/guessit/rules/__init__.py b/guessit/rules/__init__.py index 6f34e05..d56cd90 100644 --- a/guessit/rules/__init__.py +++ b/guessit/rules/__init__.py @@ -24,6 +24,7 @@ from .properties.release_group import release_group from .properties.streaming_service import streaming_service from .properties.other import other from .properties.size import size +from .properties.bit_rate import bit_rate from .properties.edition import edition from .properties.cds import cds from .properties.bonus import bonus @@ -63,6 +64,7 @@ def rebulk_builder(): rebulk.rebulk(streaming_service()) rebulk.rebulk(other()) rebulk.rebulk(size()) + rebulk.rebulk(bit_rate()) rebulk.rebulk(edition()) rebulk.rebulk(cds()) rebulk.rebulk(bonus()) diff --git a/guessit/rules/common/quantity.py b/guessit/rules/common/quantity.py index 57e455e..f2b11a1 100644 --- a/guessit/rules/common/quantity.py +++ b/guessit/rules/common/quantity.py @@ -4,9 +4,12 @@ Quantities: Size """ import re +from abc import abstractmethod import six +from ..common import seps + class Quantity(object): """ @@ -19,6 +22,14 @@ class Quantity(object): self.magnitude = magnitude self.units = units + @classmethod + @abstractmethod + def parse_units(cls, value): + """ + Parse a string to a proper unit notation. + """ + raise NotImplementedError + @classmethod def fromstring(cls, string): """ @@ -31,7 +42,7 @@ class Quantity(object): magnitude = int(values['magnitude']) except ValueError: magnitude = float(values['magnitude']) - units = values['units'].upper() + units = cls.parse_units(values['units']) return cls(magnitude, units) @@ -62,4 +73,18 @@ class Size(Quantity): e.g.: 1.1GB, 300MB """ - pass + @classmethod + def parse_units(cls, value): + return value.strip(seps).upper() + + +class BitRate(Quantity): + """ + Represent bit rate. + + e.g.: 320Kbps, 1.5Mbps + """ + + @classmethod + def parse_units(cls, value): + return value.strip(seps).capitalize() diff --git a/guessit/rules/properties/bit_rate.py b/guessit/rules/properties/bit_rate.py new file mode 100644 index 0000000..7648ff2 --- /dev/null +++ b/guessit/rules/properties/bit_rate.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +video_bit_rate and audio_bit_rate properties +""" +import re + +from rebulk import Rebulk +from rebulk.rules import Rule, RenameMatch + +from ..common import dash, seps +from ..common.quantity import BitRate +from ..common.validators import seps_surround + + +def bit_rate(): + """ + Builder for rebulk object. + :return: Created Rebulk object + :rtype: Rebulk + """ + rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) + rebulk.defaults(name='audio_bit_rate', validator=seps_surround) + rebulk.regex(r'\d+-?[kmg]bps', r'\d+\.\d+-?[kmg]bps', + conflict_solver=( + lambda match, other: match + if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags + else other + ), + formatter=BitRate.fromstring, tags=['release-group-prefix']) + + rebulk.rules(BitRateTypeRule) + + return rebulk + + +class BitRateTypeRule(Rule): + """ + Convert audio bit rate guess into video bit rate. + """ + consequence = RenameMatch('video_bit_rate') + + def when(self, matches, context): + for match in matches.named('audio_bit_rate'): + previous = matches.previous(match, index=0, + predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec')) + if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)): + yield match diff --git a/guessit/rules/properties/size.py b/guessit/rules/properties/size.py index fc4ef3f..e997aa1 100644 --- a/guessit/rules/properties/size.py +++ b/guessit/rules/properties/size.py @@ -20,6 +20,6 @@ def size(): """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='size', validator=seps_surround) - rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix']) + rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix']) return rebulk diff --git a/guessit/test/episodes.yml b/guessit/test/episodes.yml index f4e7f05..9f86287 100644 --- a/guessit/test/episodes.yml +++ b/guessit/test/episodes.yml @@ -4338,3 +4338,14 @@ episode_title: AKA Sin Bin container: mkv type: episode + +? Hotel.Hell.S01E01.720p.DD5.1.448kbps-ALANiS +: title: Hotel Hell + season: 1 + episode: 1 + screen_size: 720p + audio_codec: Dolby Digital + audio_channels: '5.1' + audio_bit_rate: 448Kbps + release_group: ALANiS + type: episode diff --git a/guessit/test/various.yml b/guessit/test/various.yml index b40722f..a267183 100644 --- a/guessit/test/various.yml +++ b/guessit/test/various.yml @@ -864,3 +864,41 @@ video_codec: H.264 release_group: SDH type: episode + +? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts +: title: Katy Perry + alternative_title: Pepsi & Billboard Summer Beats Concert + year: 2012 + screen_size: 1080i + source: HDTV + video_bit_rate: 20Mbps + audio_codec: Dolby Digital + audio_channels: '2.0' + video_codec: MPEG-2 + release_group: TrollHD + container: ts + +? Justin Timberlake - MTV Video Music Awards 2013 1080i 32 Mbps DTS-HD 5.1.ts +: title: Justin Timberlake + alternative_title: MTV Video Music Awards + year: 2013 + screen_size: 1080i + video_bit_rate: 32Mbps + audio_codec: DTS-HD + audio_channels: '5.1' + container: ts + type: movie + +? Chuck Berry The Very Best Of Chuck Berry(2010)[320 Kbps] +: title: Chuck Berry The Very Best Of Chuck Berry + year: 2010 + audio_bit_rate: 320Kbps + type: movie + +? Title Name [480p][1.5Mbps][.mp4] +: title: Title Name + screen_size: 480p + video_bit_rate: 1.5Mbps + container: mp4 + type: movie +