Fix issues related to str/unicode in python 2.7

This commit is contained in:
Toilal
2015-11-06 22:20:21 +01:00
parent 1ae6ca07f5
commit e02060c04f
39 changed files with 145 additions and 41 deletions
+4
View File
@@ -14,8 +14,12 @@ Example::
>>> guessit('Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi')
MatchesDict([('title', 'Treme'), ('season', 1), ('episodeNumber', 3), ('episodeTitle', 'Right Place, Wrong Time'), ('format', 'HDTV'), ('videoCodec', 'XviD'), ('releaseGroup', 'NoTV'), ('container', 'avi'), ('mimetype', 'video/x-msvideo'), ('type', 'episode')])
``MatchesDict`` is a dict that keeps matches ordering.
Command line options can be given as dict or string to the second argument.
GuessIt only accept unicode string, so you need to use ``u`` prefix for input string on python 2.
Properties
----------
For ``episode`` type, some properties have been renamed
+16 -9
View File
@@ -73,11 +73,10 @@ Usage
guessit can be use from command line::
$ guessit
usage: __main__.py [-h] [-n] [-Y] [-D] [-L ALLOWED_LANGUAGES]
[-C ALLOWED_COUNTRIES] [-E] [-T EXPECTED_TITLE] [-v]
[-P SHOW_PROPERTY] [-u] [-a] [-j] [-y] [-f INPUT_FILE]
[--version]
[filename [filename ...]]
usage: -c [-h] [-n] [-Y] [-D] [-L ALLOWED_LANGUAGES] [-C ALLOWED_COUNTRIES]
[-E] [-T EXPECTED_TITLE] [-f INPUT_FILE] [-v] [-P SHOW_PROPERTY]
[-a] [-j] [-y] [--version]
[filename [filename ...]]
positional arguments:
filename Filename or release name to guess
@@ -86,6 +85,8 @@ guessit can be use from command line::
-h, --help show this help message and exit
Naming:
-n, --name-only Parse files as name only, considering "/" and "\" like
other separators.
-Y, --date-year-first
If short date is found, consider the first digits as
the year.
@@ -102,20 +103,22 @@ guessit can be use from command line::
-T EXPECTED_TITLE, --expected-title EXPECTED_TITLE
Expected title to parse (can be used multiple times)
Input:
-f INPUT_FILE, --input-file INPUT_FILE
Read filenames from an input text file. File should
use UTF-8 charset.
Output:
-v, --verbose Display debug output
-P SHOW_PROPERTY, --show-property SHOW_PROPERTY
Display the value of a single property (title, series,
videoCodec, year, ...)
-u, --unidentified Display the unidentified parts.
-a, --advanced Display advanced information for filename guesses, as
json output
-j, --json Display information for filename guesses as json
output
-y, --yaml Display information for filename guesses as yaml
output (like unit-test)
-f INPUT_FILE, --input-file INPUT_FILE
Read filenames from an input file.
output
Information:
--version Display the guessit version.
@@ -128,6 +131,10 @@ It can also be used as a python module::
``MatchesDict`` is a dict that keeps matches ordering.
Command line options can be given as dict or string to the second argument.
GuessIt only accept unicode string, so you need to use ``u`` prefix for input string on python 2.
Support
-------
+15 -16
View File
@@ -4,20 +4,23 @@
Entry point module
"""
# pragma: no cover
from __future__ import print_function
from __future__ import print_function, unicode_literals
from collections import OrderedDict
import os
import logging
import json
import sys
import six
from rebulk.match import Match
import six
from guessit.__version__ import __version__
from guessit.options import argument_parser
from guessit.api import guessit
from io import open #pylint:disable=redefined-builtin
class GuessitEncoder(json.JSONEncoder):
"""
JSON Encoder for guessit response
@@ -73,20 +76,12 @@ def main(args=None): # pylint:disable=too-many-branches
"""
Main function for entry point
"""
if six.PY2: # pragma: no cover
import codecs
import locale
import sys
if six.PY2 and os.name == 'nt': # pragma: no cover
# see http://bugs.python.org/issue2128
if os.name == 'nt':
for i, j in enumerate(sys.argv):
sys.argv[i] = j.decode(locale.getpreferredencoding())
import locale
# see https://github.com/wackou/guessit/issues/43
# and http://stackoverflow.com/questions/4545661/unicodedecodeerror-when-redirecting-to-file
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
for i, j in enumerate(sys.argv):
sys.argv[i] = j.decode(locale.getpreferredencoding())
if args is None: # pragma: no cover
options = argument_parser.parse_args()
@@ -121,9 +116,13 @@ def main(args=None): # pylint:disable=too-many-branches
filenames = []
if options.filename:
filenames.extend(options.filename)
for filename in options.filename:
encoding = sys.getfilesystemencoding()
if not isinstance(filename, six.text_type):
filename = filename.decode(encoding)
filenames.append(filename)
if options.input_file:
input_file = open(options.input_file, 'r')
input_file = open(options.input_file, 'r', encoding='utf-8')
try:
filenames.extend([line.strip() for line in input_file.readlines()])
finally:
+5
View File
@@ -3,6 +3,10 @@
"""
API functions that can be used by external software
"""
from __future__ import unicode_literals
import six
from .rules import REBULK
from .options import parse_options
@@ -18,5 +22,6 @@ def guessit(string, options=None):
:return:
:rtype:
"""
assert isinstance(string, six.text_type), "guessit input must be %s." % six.text_type.__name__
options = parse_options(options)
return REBULK.matches(string, options).to_dict(options.get('advanced', False))
+3 -2
View File
@@ -3,6 +3,7 @@
"""
Options
"""
from __future__ import unicode_literals
from argparse import ArgumentParser
import shlex
@@ -21,7 +22,7 @@ def build_argument_parser():
naming_opts = opts.add_argument_group("Naming")
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=False,
help='Parse files as name only (consider "/" and "\\" like other separators)')
help='Parse files as name only, considering "/" and "\\" like other separators.')
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
help='If short date is found, consider the first digits as the year.')
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
@@ -39,7 +40,7 @@ def build_argument_parser():
input_opts = opts.add_argument_group("Input")
input_opts.add_argument('-f', '--input-file', dest='input_file', default=False,
help='Read filenames from an input file.')
help='Read filenames from an input text file. File should use UTF-8 charset.')
output_opts = opts.add_argument_group("Output")
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=False,
+2
View File
@@ -3,6 +3,8 @@
"""
Rebulk configuration for guessit
"""
from __future__ import unicode_literals
from rebulk import Rebulk
from .markers.path import PATH_MARKER
+5 -5
View File
@@ -3,11 +3,11 @@
"""
Common module
"""
import six
from __future__ import unicode_literals
seps = six.u(r' [](){}+*|&=§-_~#/\.,;:') # list of tags/words separators
seps = r' [](){}+*|&=§-_~#/\.,;:' # list of tags/words separators
title_seps = six.u(r'-+/\|;') # separators for title
title_seps = r'-+/\|;' # separators for title
dash = (six.u(r'-'), six.u(r'[\W_]')) # abbreviation used by many rebulk objects.
alt_dash = (six.u(r'@'), six.u(r'[\W_]')) # abbreviation used by many rebulk objects.
dash = (r'-', r'[\W_]') # abbreviation used by many rebulk objects.
alt_dash = (r'@', r'[\W_]') # abbreviation used by many rebulk objects.
+2
View File
@@ -3,6 +3,8 @@
"""
Comparators
"""
from __future__ import unicode_literals
from functools import cmp_to_key
+1 -2
View File
@@ -3,8 +3,7 @@
"""
Date
"""
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import unicode_literals
import regex as re
+1
View File
@@ -3,6 +3,7 @@
"""
Formatters
"""
from __future__ import unicode_literals
from . import seps
import regex as re
+1 -2
View File
@@ -3,8 +3,7 @@
"""
parse numeral from various formats
"""
from __future__ import absolute_import, division, print_function, unicode_literals
from __future__ import unicode_literals
import regex as re
+1
View File
@@ -3,6 +3,7 @@
"""
Validators
"""
from __future__ import unicode_literals
from functools import partial
+2
View File
@@ -3,6 +3,8 @@
"""
Words utils
"""
from __future__ import unicode_literals
import regex as re
_words_rexp = re.compile(r'\w+', re.UNICODE)
+1
View File
@@ -3,6 +3,7 @@
"""
Groups markers (...), [...] and {...}
"""
from __future__ import unicode_literals
from rebulk import Rebulk
+1
View File
@@ -3,6 +3,7 @@
"""
Path markers
"""
from __future__ import unicode_literals
from rebulk import Rebulk
+2
View File
@@ -3,6 +3,8 @@
"""
Processors
"""
from __future__ import unicode_literals
from collections import defaultdict
import copy
from rebulk import Rebulk
+2
View File
@@ -3,6 +3,8 @@
"""
audioCodec and audioProfile property
"""
from __future__ import unicode_literals
from rebulk import Rebulk, Rule, RemoveMatch
import regex as re
+2
View File
@@ -3,6 +3,8 @@
"""
Bonus support
"""
from __future__ import unicode_literals
import regex as re
from guessit.rules.properties.title import TitleFromPosition
from rebulk import Rebulk, AppendMatch, Rule
+2
View File
@@ -3,6 +3,8 @@
"""
cd properties
"""
from __future__ import unicode_literals
from rebulk import Rebulk
import regex as re
+2
View File
@@ -3,6 +3,8 @@
"""
Container support
"""
from __future__ import unicode_literals
from rebulk import Rebulk
import regex as re
+2
View File
@@ -3,6 +3,8 @@
"""
crc and idNumber properties
"""
from __future__ import unicode_literals
from rebulk import Rebulk
import regex as re
+2
View File
@@ -3,6 +3,8 @@
"""
Date and year
"""
from __future__ import unicode_literals
from rebulk import Rebulk, RemoveMatch, Rule
from ..common.date import search_date, valid_year
+2
View File
@@ -3,6 +3,8 @@
"""
edition property
"""
from __future__ import unicode_literals
from rebulk import Rebulk
import regex as re
@@ -3,6 +3,8 @@
"""
Episode title
"""
from __future__ import unicode_literals
from collections import defaultdict
from guessit.rules.common import seps, title_seps
from guessit.rules.properties.title import TitleFromPosition, TitleBaseRule
+2
View File
@@ -3,6 +3,8 @@
"""
Season/Episode numbering support
"""
from __future__ import unicode_literals
from collections import defaultdict
import copy
+2
View File
@@ -3,6 +3,8 @@
"""
Film support
"""
from __future__ import unicode_literals
import regex as re
from rebulk import Rebulk, AppendMatch, Rule
+2
View File
@@ -3,6 +3,8 @@
"""
format property
"""
from __future__ import unicode_literals
from rebulk import Rebulk, RemoveMatch, Rule
import regex as re
+1
View File
@@ -5,6 +5,7 @@ Language and subtitleLanguage
"""
# pylint: disable=no-member
from __future__ import unicode_literals
import copy
import regex as re
+2
View File
@@ -3,6 +3,8 @@
"""
mimetype property
"""
from __future__ import unicode_literals
import mimetypes
from rebulk.match import Match
+2
View File
@@ -3,6 +3,8 @@
"""
format property
"""
from __future__ import unicode_literals
import copy
from rebulk import Rebulk, Rule, RemoveMatch
+2
View File
@@ -3,6 +3,8 @@
"""
Part support
"""
from __future__ import unicode_literals
import regex as re
from rebulk import Rebulk
@@ -3,6 +3,8 @@
"""
Release group
"""
from __future__ import unicode_literals
import copy
from guessit.rules.common.validators import int_coercable
from guessit.rules.properties.title import TitleFromPosition
+2
View File
@@ -3,6 +3,8 @@
"""
screenSize property
"""
from __future__ import unicode_literals
from rebulk import Rebulk, Rule, RemoveMatch
import regex as re
+2
View File
@@ -3,6 +3,8 @@
"""
Title
"""
from __future__ import unicode_literals
from guessit.rules.properties.film import FilmTitleRule
from guessit.rules.properties.language import SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, \
SubtitleExtensionRule
+2
View File
@@ -3,6 +3,8 @@
"""
screenSize property
"""
from __future__ import unicode_literals
from rebulk.match import Match
+2
View File
@@ -3,6 +3,8 @@
"""
videoCodec and videoProfile property
"""
from __future__ import unicode_literals
from rebulk import Rebulk, Rule, RemoveMatch
import regex as re
+1
View File
@@ -3,6 +3,7 @@
"""
Website property.
"""
from __future__ import unicode_literals
from pkg_resources import resource_stream # @UnresolvedImport
+26
View File
@@ -0,0 +1,26 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
import os
import pytest
from ..api import guessit
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
def test_default():
ret = guessit(u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
assert ret and 'title' in ret
def test_unicode():
ret = guessit(u'[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi')
assert ret and 'title' in ret
def test_main_non_unicode():
with pytest.raises(AssertionError):
guessit(b'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv')
+17 -5
View File
@@ -15,24 +15,36 @@ def test_main_no_args():
main([])
def test_main():
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv'])
def test_main_unicode():
main([u'[阿维达].Avida.2006.FRENCH.DVDRiP.XViD-PROD.avi'])
def test_main_non_unicode():
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv'])
def test_main_verbose():
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--verbose'])
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--verbose'])
def test_main_yaml():
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--yaml'])
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--yaml'])
def test_main_json():
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--json'])
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '--json'])
def test_main_show_property():
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-P', 'title'])
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-P', 'title'])
def test_main_advanced():
main(['Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-a'])
main([u'Fear.and.Loathing.in.Las.Vegas.FRENCH.ENGLISH.720p.HDDVD.DTS.x264-ESiR.mkv', '-a'])
def test_main_input():