mirror of
https://github.com/zoriya/guessit.git
synced 2026-06-07 12:15:13 +00:00
Fix issues related to str/unicode in python 2.7
This commit is contained in:
@@ -14,8 +14,12 @@ Example::
|
||||
>>> guessit('Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi')
|
||||
MatchesDict([('title', 'Treme'), ('season', 1), ('episodeNumber', 3), ('episodeTitle', 'Right Place, Wrong Time'), ('format', 'HDTV'), ('videoCodec', 'XviD'), ('releaseGroup', 'NoTV'), ('container', 'avi'), ('mimetype', 'video/x-msvideo'), ('type', 'episode')])
|
||||
|
||||
``MatchesDict`` is a dict that keeps matches ordering.
|
||||
|
||||
Command line options can be given as dict or string to the second argument.
|
||||
|
||||
GuessIt only accept unicode string, so you need to use ``u`` prefix for input string on python 2.
|
||||
|
||||
Properties
|
||||
----------
|
||||
For ``episode`` type, some properties have been renamed
|
||||
|
||||
+16
-9
@@ -73,11 +73,10 @@ Usage
|
||||
guessit can be use from command line::
|
||||
|
||||
$ guessit
|
||||
usage: __main__.py [-h] [-n] [-Y] [-D] [-L ALLOWED_LANGUAGES]
|
||||
[-C ALLOWED_COUNTRIES] [-E] [-T EXPECTED_TITLE] [-v]
|
||||
[-P SHOW_PROPERTY] [-u] [-a] [-j] [-y] [-f INPUT_FILE]
|
||||
[--version]
|
||||
[filename [filename ...]]
|
||||
usage: -c [-h] [-n] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-C ALLOWED_COUNTRIES]
|
||||
[-E] [-T EXPECTED_TITLE] [-f INPUT_FILE] [-v] [-P SHOW_PROPERTY]
|
||||
[-a] [-j] [-y] [--version]
|
||||
[filename [filename ...]]
|
||||
|
||||
positional arguments:
|
||||
filename Filename or release name to guess
|
||||
@@ -86,6 +85,8 @@ guessit can be use from command line::
|
||||
-h, --help show this help message and exit
|
||||
|
||||
Naming:
|
||||
-n, --name-only Parse files as name only, considering "/" and "\" like
|
||||
other separators.
|
||||
-Y, --date-year-first
|
||||
If short date is found, consider the first digits as
|
||||
the year.
|
||||
@@ -102,20 +103,22 @@ guessit can be use from command line::
|
||||
-T EXPECTED_TITLE, --expected-title EXPECTED_TITLE
|
||||
Expected title to parse (can be used multiple times)
|
||||
|
||||
Input:
|
||||
-f INPUT_FILE, --input-file INPUT_FILE
|
||||
Read filenames from an input text file. File should
|
||||
use UTF-8 charset.
|
||||
|
||||
Output:
|
||||
-v, --verbose Display debug output
|
||||
-P SHOW_PROPERTY, --show-property SHOW_PROPERTY
|
||||
Display the value of a single property (title, series,
|
||||
videoCodec, year, ...)
|
||||
-u, --unidentified Display the unidentified parts.
|
||||
-a, --advanced Display advanced information for filename guesses, as
|
||||
json output
|
||||
-j, --json Display information for filename guesses as json
|
||||
output
|
||||
-y, --yaml Display information for filename guesses as yaml
|
||||
output (like unit-test)
|
||||
-f INPUT_FILE, --input-file INPUT_FILE
|
||||
Read filenames from an input file.
|
||||
output
|
||||
|
||||
Information:
|
||||
--version Display the guessit version.
|
||||
@@ -128,6 +131,10 @@ It can also be used as a python module::
|
||||
|
||||
``MatchesDict`` is a dict that keeps matches ordering.
|
||||
|
||||
Command line options can be given as dict or string to the second argument.
|
||||
|
||||
GuessIt only accept unicode string, so you need to use ``u`` prefix for input string on python 2.
|
||||
|
||||
|
||||
Support
|
||||
-------
|
||||
|
||||
+15
-16
@@ -4,20 +4,23 @@
|
||||
Entry point module
|
||||
"""
|
||||
# pragma: no cover
|
||||
from __future__ import print_function
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
import sys
|
||||
import six
|
||||
|
||||
from rebulk.match import Match
|
||||
import six
|
||||
|
||||
from guessit.__version__ import __version__
|
||||
from guessit.options import argument_parser
|
||||
from guessit.api import guessit
|
||||
|
||||
from io import open #pylint:disable=redefined-builtin
|
||||
|
||||
class GuessitEncoder(json.JSONEncoder):
|
||||
"""
|
||||
JSON Encoder for guessit response
|
||||
@@ -73,20 +76,12 @@ def main(args=None): # pylint:disable=too-many-branches
|
||||
"""
|
||||
Main function for entry point
|
||||
"""
|
||||
if six.PY2: # pragma: no cover
|
||||
import codecs
|
||||
import locale
|
||||
import sys
|
||||
|
||||
if six.PY2 and os.name == 'nt': # pragma: no cover
|
||||
# see http://bugs.python.org/issue2128
|
||||
if os.name == 'nt':
|
||||
for i, j in enumerate(sys.argv):
|
||||
sys.argv[i] = j.decode(locale.getpreferredencoding())
|
||||
import locale
|
||||
|
||||
# see https://github.com/wackou/guessit/issues/43
|
||||
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
|
||||
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
|
||||
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
||||
for i, j in enumerate(sys.argv):
|
||||
sys.argv[i] = j.decode(locale.getpreferredencoding())
|
||||
|
||||
if args is None: # pragma: no cover
|
||||
options = argument_parser.parse_args()
|
||||
@@ -121,9 +116,13 @@ def main(args=None): # pylint:disable=too-many-branches
|
||||
|
||||
filenames = []
|
||||
if options.filename:
|
||||
filenames.extend(options.filename)
|
||||
for filename in options.filename:
|
||||
encoding = sys.getfilesystemencoding()
|
||||
if not isinstance(filename, six.text_type):
|
||||
filename = filename.decode(encoding)
|
||||
filenames.append(filename)
|
||||
if options.input_file:
|
||||
input_file = open(options.input_file, 'r')
|
||||
input_file = open(options.input_file, 'r', encoding='utf-8')
|
||||
try:
|
||||
filenames.extend([line.strip() for line in input_file.readlines()])
|
||||
finally:
|
||||
|
||||
@@ -3,6 +3,10 @@
|
||||
"""
|
||||
API functions that can be used by external software
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import six
|
||||
|
||||
from .rules import REBULK
|
||||
|
||||
from .options import parse_options
|
||||
@@ -18,5 +22,6 @@ def guessit(string, options=None):
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
assert isinstance(string, six.text_type), "guessit input must be %s." % six.text_type.__name__
|
||||
options = parse_options(options)
|
||||
return REBULK.matches(string, options).to_dict(options.get('advanced', False))
|
||||
|
||||
+3
-2
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
Options
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import shlex
|
||||
@@ -21,7 +22,7 @@ def build_argument_parser():
|
||||
|
||||
naming_opts = opts.add_argument_group("Naming")
|
||||
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
|
||||
help='Parse files as name only (consider "/" and "\\" like other separators)')
|
||||
help='Parse files as name only, considering "/" and "\\" like other separators.')
|
||||
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
|
||||
help='If short date is found, consider the first digits as the year.')
|
||||
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
|
||||
@@ -39,7 +40,7 @@ def build_argument_parser():
|
||||
|
||||
input_opts = opts.add_argument_group("Input")
|
||||
input_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
|
||||
help='Read filenames from an input file.')
|
||||
help='Read filenames from an input text file. File should use UTF-8 charset.')
|
||||
|
||||
output_opts = opts.add_argument_group("Output")
|
||||
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Rebulk configuration for guessit
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from .markers.path import PATH_MARKER
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
"""
|
||||
Common module
|
||||
"""
|
||||
import six
|
||||
from __future__ import unicode_literals
|
||||
|
||||
seps = six.u(r' [](){}+*|&=§-_~#/\.,;:') # list of tags/words separators
|
||||
seps = r' [](){}+*|&=§-_~#/\.,;:' # list of tags/words separators
|
||||
|
||||
title_seps = six.u(r'-+/\|;') # separators for title
|
||||
title_seps = r'-+/\|;' # separators for title
|
||||
|
||||
dash = (six.u(r'-'), six.u(r'[\W_]')) # abbreviation used by many rebulk objects.
|
||||
alt_dash = (six.u(r'@'), six.u(r'[\W_]')) # abbreviation used by many rebulk objects.
|
||||
dash = (r'-', r'[\W_]') # abbreviation used by many rebulk objects.
|
||||
alt_dash = (r'@', r'[\W_]') # abbreviation used by many rebulk objects.
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Comparators
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from functools import cmp_to_key
|
||||
|
||||
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
"""
|
||||
Date
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
Formatters
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from . import seps
|
||||
import regex as re
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
"""
|
||||
parse numeral from various formats
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
Validators
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from functools import partial
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Words utils
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
|
||||
_words_rexp = re.compile(r'\w+', re.UNICODE)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
Groups markers (...), [...] and {...}
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
Path markers
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Processors
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
from rebulk import Rebulk
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
audioCodec and audioProfile property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Bonus support
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
from guessit.rules.properties.title import TitleFromPosition
|
||||
from rebulk import Rebulk, AppendMatch, Rule
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
cd properties
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Container support
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
import regex as re
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
crc and idNumber properties
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Date and year
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk, RemoveMatch, Rule
|
||||
|
||||
from ..common.date import search_date, valid_year
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
edition property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Episode title
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import defaultdict
|
||||
from guessit.rules.common import seps, title_seps
|
||||
from guessit.rules.properties.title import TitleFromPosition, TitleBaseRule
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Season/Episode numbering support
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Film support
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
from rebulk import Rebulk, AppendMatch, Rule
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
format property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk, RemoveMatch, Rule
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ Language and subtitleLanguage
|
||||
"""
|
||||
# pylint: disable=no-member
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import copy
|
||||
|
||||
import regex as re
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
mimetype property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import mimetypes
|
||||
|
||||
from rebulk.match import Match
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
format property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Part support
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import regex as re
|
||||
from rebulk import Rebulk
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Release group
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import copy
|
||||
from guessit.rules.common.validators import int_coercable
|
||||
from guessit.rules.properties.title import TitleFromPosition
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
screenSize property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
Title
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from guessit.rules.properties.film import FilmTitleRule
|
||||
from guessit.rules.properties.language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, \
|
||||
SubtitleExtensionRule
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
screenSize property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk.match import Match
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
videoCodec and videoProfile property
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
import regex as re
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
Website property.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from pkg_resources import resource_stream # @UnresolvedImport
|
||||
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from ..api import guessit
|
||||
|
||||
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
|
||||
|
||||
def test_default():
|
||||
ret = guessit(u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
|
||||
assert ret and 'title' in ret
|
||||
|
||||
|
||||
def test_unicode():
|
||||
ret = guessit(u'[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi')
|
||||
assert ret and 'title' in ret
|
||||
|
||||
|
||||
def test_main_non_unicode():
|
||||
with pytest.raises(AssertionError):
|
||||
guessit(b'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
|
||||
@@ -15,24 +15,36 @@ def test_main_no_args():
|
||||
main([])
|
||||
|
||||
|
||||
def test_main():
|
||||
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv'])
|
||||
|
||||
|
||||
def test_main_unicode():
|
||||
main([u'[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi'])
|
||||
|
||||
|
||||
def test_main_non_unicode():
|
||||
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv'])
|
||||
|
||||
|
||||
def test_main_verbose():
|
||||
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--verbose'])
|
||||
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--verbose'])
|
||||
|
||||
|
||||
def test_main_yaml():
|
||||
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--yaml'])
|
||||
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--yaml'])
|
||||
|
||||
|
||||
def test_main_json():
|
||||
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--json'])
|
||||
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--json'])
|
||||
|
||||
|
||||
def test_main_show_property():
|
||||
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-P', 'title'])
|
||||
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-P', 'title'])
|
||||
|
||||
|
||||
def test_main_advanced():
|
||||
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-a'])
|
||||
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-a'])
|
||||
|
||||
|
||||
def test_main_input():
|
||||
|
||||
Reference in New Issue
Block a user