Document tools
This commit is contained in:
parent
f4ab698aba
commit
3a86d75dfa
|
@ -10,12 +10,17 @@ For Python 3.6+
|
|||
* Ver [documentación](https://gitlab.com/mauriciobaeza/zaz/wikis/inicio)
|
||||
|
||||
|
||||
### Software libre, not gratis
|
||||
|
||||
|
||||
This extension have a cost of maintenance of 1 euro every year.
|
||||
|
||||
BCH: `1RPLWHJW34p7pMQV1ft4x7eWhAYw69Dsb`
|
||||
|
||||
BTC: `3Fe4JuADrAK8Qs7GDAxbSXR8E54avwZJLW`
|
||||
|
||||
PayPal :( donate ATT elmau DOT net
|
||||
|
||||
|
||||
## Extensions develop with ZAZ
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from .fernet import *
|
|
@ -8,7 +8,7 @@ import hmac
|
|||
import time
|
||||
import os
|
||||
import struct
|
||||
from pyaes import AESModeOfOperationCBC, Encrypter, Decrypter
|
||||
from .pyaes import AESModeOfOperationCBC, Encrypter, Decrypter
|
||||
|
||||
|
||||
__all__ = [
|
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from .peewee import *
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import easymacro as app
|
||||
from .peewee import (
|
||||
__exception_wrapper__,
|
||||
AutoField,
|
||||
Database,
|
||||
DateField,
|
||||
Entity,
|
||||
NodeList,
|
||||
SQL,
|
||||
)
|
||||
|
||||
|
||||
class FirebirdAutoField(AutoField):
|
||||
extra = 'GENERATED BY DEFAULT AS IDENTITY'
|
||||
|
||||
def ddl(self, ctx):
|
||||
accum = [Entity(self.column_name)]
|
||||
data_type = self.ddl_datatype(ctx)
|
||||
if data_type:
|
||||
accum.append(data_type)
|
||||
if self.unindexed:
|
||||
accum.append(SQL('UNINDEXED'))
|
||||
if self.extra:
|
||||
accum.append(SQL(self.extra))
|
||||
if self.primary_key:
|
||||
accum.append(SQL('PRIMARY KEY'))
|
||||
if not self.null:
|
||||
accum.append(SQL('NOT NULL'))
|
||||
if self.sequence:
|
||||
accum.append(SQL("DEFAULT NEXTVAL('%s')" % self.sequence))
|
||||
if self.constraints:
|
||||
accum.extend(self.constraints)
|
||||
if self.collation:
|
||||
accum.append(SQL('COLLATE %s' % self.collation))
|
||||
return NodeList(accum)
|
||||
|
||||
|
||||
class FirebirdDateField(DateField):
|
||||
|
||||
def db_value(self, value):
|
||||
return app.date_to_struct(value)
|
||||
|
||||
def python_value(self, value):
|
||||
return app._to_date(value)
|
||||
|
||||
|
||||
class LOBaseDatabase(Database):
|
||||
|
||||
def __init__(self, database, **kwargs):
|
||||
super().__init__(database, **kwargs)
|
||||
self._db = None
|
||||
|
||||
def _connect(self):
|
||||
self._db = app.get_db(self.database)
|
||||
return self._db
|
||||
|
||||
def execute_sql(self, sql, params=None, commit=True):
|
||||
with __exception_wrapper__:
|
||||
cursor = self._db.execute(sql, params)
|
||||
return cursor
|
||||
|
||||
def last_insert_id(self, cursor, query_type=None):
|
||||
# ~ app.mri(cursor)
|
||||
return 1
|
||||
|
||||
# ~ def get_tables(self):
|
||||
# ~ res = self.execute('SHOW TABLES;')
|
||||
# ~ return [r[0] for r in res.fetchall()]
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,104 @@
|
|||
from pg8000.core import (
|
||||
Warning, DataError, DatabaseError, InterfaceError, ProgrammingError,
|
||||
Error, OperationalError, IntegrityError, InternalError, NotSupportedError,
|
||||
ArrayContentNotHomogenousError, ArrayDimensionsNotConsistentError,
|
||||
ArrayContentNotSupportedError, Connection, Cursor, Binary, Date,
|
||||
DateFromTicks, Time, TimeFromTicks, Timestamp, TimestampFromTicks, BINARY,
|
||||
Interval, PGEnum, PGJson, PGJsonb, PGTsvector, PGText, PGVarchar)
|
||||
from ._version import get_versions
|
||||
__version__ = get_versions()['version']
|
||||
del get_versions
|
||||
|
||||
# Copyright (c) 2007-2009, Mathieu Fenniak
|
||||
# Copyright (c) The Contributors
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
__author__ = "Mathieu Fenniak"
|
||||
|
||||
|
||||
def connect(
|
||||
user, host='localhost', unix_sock=None, port=5432, database=None,
|
||||
password=None, ssl=None, timeout=None, application_name=None,
|
||||
max_prepared_statements=1000, tcp_keepalive=True):
|
||||
|
||||
return Connection(
|
||||
user, host, unix_sock, port, database, password, ssl, timeout,
|
||||
application_name, max_prepared_statements, tcp_keepalive)
|
||||
|
||||
|
||||
apilevel = "2.0"
|
||||
"""The DBAPI level supported, currently "2.0".
|
||||
|
||||
This property is part of the `DBAPI 2.0 specification
|
||||
<http://www.python.org/dev/peps/pep-0249/>`_.
|
||||
"""
|
||||
|
||||
threadsafety = 1
|
||||
"""Integer constant stating the level of thread safety the DBAPI interface
|
||||
supports. This DBAPI module supports sharing of the module only. Connections
|
||||
and cursors my not be shared between threads. This gives pg8000 a threadsafety
|
||||
value of 1.
|
||||
|
||||
This property is part of the `DBAPI 2.0 specification
|
||||
<http://www.python.org/dev/peps/pep-0249/>`_.
|
||||
"""
|
||||
|
||||
paramstyle = 'format'
|
||||
|
||||
max_prepared_statements = 1000
|
||||
|
||||
# I have no idea what this would be used for by a client app. Should it be
|
||||
# TEXT, VARCHAR, CHAR? It will only compare against row_description's
|
||||
# type_code if it is this one type. It is the varchar type oid for now, this
|
||||
# appears to match expectations in the DB API 2.0 compliance test suite.
|
||||
|
||||
STRING = 1043
|
||||
"""String type oid."""
|
||||
|
||||
|
||||
NUMBER = 1700
|
||||
"""Numeric type oid"""
|
||||
|
||||
DATETIME = 1114
|
||||
"""Timestamp type oid"""
|
||||
|
||||
ROWID = 26
|
||||
"""ROWID type oid"""
|
||||
|
||||
__all__ = [
|
||||
Warning, DataError, DatabaseError, connect, InterfaceError,
|
||||
ProgrammingError, Error, OperationalError, IntegrityError, InternalError,
|
||||
NotSupportedError, ArrayContentNotHomogenousError,
|
||||
ArrayDimensionsNotConsistentError, ArrayContentNotSupportedError,
|
||||
Connection, Cursor, Binary, Date, DateFromTicks, Time, TimeFromTicks,
|
||||
Timestamp, TimestampFromTicks, BINARY, Interval, PGEnum, PGJson, PGJsonb,
|
||||
PGTsvector, PGText, PGVarchar]
|
||||
|
||||
"""Version string for pg8000.
|
||||
|
||||
.. versionadded:: 1.9.11
|
||||
"""
|
|
@ -0,0 +1,460 @@
|
|||
|
||||
# This file helps to compute a version number in source trees obtained from
|
||||
# git-archive tarball (such as those provided by githubs download-from-tag
|
||||
# feature). Distribution tarballs (built by setup.py sdist) and build
|
||||
# directories (produced by setup.py build) will contain a much shorter file
|
||||
# that just contains the computed version number.
|
||||
|
||||
# This file is released into the public domain. Generated by
|
||||
# versioneer-0.15 (https://github.com/warner/python-versioneer)
|
||||
|
||||
import errno
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def get_keywords():
|
||||
# these strings will be replaced by git during git-archive.
|
||||
# setup.py/versioneer.py will grep for the variable names, so they must
|
||||
# each be defined on a line of their own. _version.py will just call
|
||||
# get_keywords().
|
||||
git_refnames = "$Format:%d$"
|
||||
git_full = "$Format:%H$"
|
||||
keywords = {"refnames": git_refnames, "full": git_full}
|
||||
return keywords
|
||||
|
||||
|
||||
class VersioneerConfig:
|
||||
pass
|
||||
|
||||
|
||||
def get_config():
|
||||
# these strings are filled in when 'setup.py versioneer' creates
|
||||
# _version.py
|
||||
cfg = VersioneerConfig()
|
||||
cfg.VCS = "git"
|
||||
cfg.style = "pep440"
|
||||
cfg.tag_prefix = ""
|
||||
cfg.parentdir_prefix = "pg8000-"
|
||||
cfg.versionfile_source = "pg8000/_version.py"
|
||||
cfg.verbose = False
|
||||
return cfg
|
||||
|
||||
|
||||
class NotThisMethod(Exception):
|
||||
pass
|
||||
|
||||
|
||||
LONG_VERSION_PY = {}
|
||||
HANDLERS = {}
|
||||
|
||||
|
||||
def register_vcs_handler(vcs, method): # decorator
|
||||
def decorate(f):
|
||||
if vcs not in HANDLERS:
|
||||
HANDLERS[vcs] = {}
|
||||
HANDLERS[vcs][method] = f
|
||||
return f
|
||||
return decorate
|
||||
|
||||
|
||||
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
|
||||
assert isinstance(commands, list)
|
||||
p = None
|
||||
for c in commands:
|
||||
try:
|
||||
dispcmd = str([c] + args)
|
||||
# remember shell=False, so use git.cmd on windows, not just git
|
||||
p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
|
||||
stderr=(subprocess.PIPE if hide_stderr
|
||||
else None))
|
||||
break
|
||||
except EnvironmentError:
|
||||
e = sys.exc_info()[1]
|
||||
if e.errno == errno.ENOENT:
|
||||
continue
|
||||
if verbose:
|
||||
print("unable to run %s" % dispcmd)
|
||||
print(e)
|
||||
return None
|
||||
else:
|
||||
if verbose:
|
||||
print("unable to find command, tried %s" % (commands,))
|
||||
return None
|
||||
stdout = p.communicate()[0].strip()
|
||||
if sys.version_info[0] >= 3:
|
||||
stdout = stdout.decode()
|
||||
if p.returncode != 0:
|
||||
if verbose:
|
||||
print("unable to run %s (error)" % dispcmd)
|
||||
return None
|
||||
return stdout
|
||||
|
||||
|
||||
def versions_from_parentdir(parentdir_prefix, root, verbose):
|
||||
# Source tarballs conventionally unpack into a directory that includes
|
||||
# both the project name and a version string.
|
||||
dirname = os.path.basename(root)
|
||||
if not dirname.startswith(parentdir_prefix):
|
||||
if verbose:
|
||||
print("guessing rootdir is '%s', but '%s' doesn't start with "
|
||||
"prefix '%s'" % (root, dirname, parentdir_prefix))
|
||||
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
|
||||
return {"version": dirname[len(parentdir_prefix):],
|
||||
"full-revisionid": None,
|
||||
"dirty": False, "error": None}
|
||||
|
||||
|
||||
@register_vcs_handler("git", "get_keywords")
|
||||
def git_get_keywords(versionfile_abs):
|
||||
# the code embedded in _version.py can just fetch the value of these
|
||||
# keywords. When used from setup.py, we don't want to import _version.py,
|
||||
# so we do it with a regexp instead. This function is not used from
|
||||
# _version.py.
|
||||
keywords = {}
|
||||
try:
|
||||
f = open(versionfile_abs, "r")
|
||||
for line in f.readlines():
|
||||
if line.strip().startswith("git_refnames ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["refnames"] = mo.group(1)
|
||||
if line.strip().startswith("git_full ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["full"] = mo.group(1)
|
||||
f.close()
|
||||
except EnvironmentError:
|
||||
pass
|
||||
return keywords
|
||||
|
||||
|
||||
@register_vcs_handler("git", "keywords")
|
||||
def git_versions_from_keywords(keywords, tag_prefix, verbose):
|
||||
if not keywords:
|
||||
raise NotThisMethod("no keywords at all, weird")
|
||||
refnames = keywords["refnames"].strip()
|
||||
if refnames.startswith("$Format"):
|
||||
if verbose:
|
||||
print("keywords are unexpanded, not using")
|
||||
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
|
||||
refs = set([r.strip() for r in refnames.strip("()").split(",")])
|
||||
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
|
||||
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
|
||||
TAG = "tag: "
|
||||
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
|
||||
if not tags:
|
||||
# Either we're using git < 1.8.3, or there really are no tags. We use
|
||||
# a heuristic: assume all version tags have a digit. The old git %d
|
||||
# expansion behaves like git log --decorate=short and strips out the
|
||||
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
|
||||
# between branches and tags. By ignoring refnames without digits, we
|
||||
# filter out many common branch names like "release" and
|
||||
# "stabilization", as well as "HEAD" and "master".
|
||||
tags = set([r for r in refs if re.search(r'\d', r)])
|
||||
if verbose:
|
||||
print("discarding '%s', no digits" % ",".join(refs-tags))
|
||||
if verbose:
|
||||
print("likely tags: %s" % ",".join(sorted(tags)))
|
||||
for ref in sorted(tags):
|
||||
# sorting will prefer e.g. "2.0" over "2.0rc1"
|
||||
if ref.startswith(tag_prefix):
|
||||
r = ref[len(tag_prefix):]
|
||||
if verbose:
|
||||
print("picking %s" % r)
|
||||
return {"version": r,
|
||||
"full-revisionid": keywords["full"].strip(),
|
||||
"dirty": False, "error": None
|
||||
}
|
||||
# no suitable tags, so version is "0+unknown", but full hex is still there
|
||||
if verbose:
|
||||
print("no suitable tags, using unknown + full revision id")
|
||||
return {"version": "0+unknown",
|
||||
"full-revisionid": keywords["full"].strip(),
|
||||
"dirty": False, "error": "no suitable tags"}
|
||||
|
||||
|
||||
@register_vcs_handler("git", "pieces_from_vcs")
|
||||
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
|
||||
# this runs 'git' from the root of the source tree. This only gets called
|
||||
# if the git-archive 'subst' keywords were *not* expanded, and
|
||||
# _version.py hasn't already been rewritten with a short version string,
|
||||
# meaning we're inside a checked out source tree.
|
||||
|
||||
if not os.path.exists(os.path.join(root, ".git")):
|
||||
if verbose:
|
||||
print("no .git in %s" % root)
|
||||
raise NotThisMethod("no .git directory")
|
||||
|
||||
GITS = ["git"]
|
||||
if sys.platform == "win32":
|
||||
GITS = ["git.cmd", "git.exe"]
|
||||
# if there is a tag, this yields TAG-NUM-gHEX[-dirty]
|
||||
# if there are no tags, this yields HEX[-dirty] (no NUM)
|
||||
describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
|
||||
"--always", "--long"],
|
||||
cwd=root)
|
||||
# --long was added in git-1.5.5
|
||||
if describe_out is None:
|
||||
raise NotThisMethod("'git describe' failed")
|
||||
describe_out = describe_out.strip()
|
||||
full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
|
||||
if full_out is None:
|
||||
raise NotThisMethod("'git rev-parse' failed")
|
||||
full_out = full_out.strip()
|
||||
|
||||
pieces = {}
|
||||
pieces["long"] = full_out
|
||||
pieces["short"] = full_out[:7] # maybe improved later
|
||||
pieces["error"] = None
|
||||
|
||||
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
|
||||
# TAG might have hyphens.
|
||||
git_describe = describe_out
|
||||
|
||||
# look for -dirty suffix
|
||||
dirty = git_describe.endswith("-dirty")
|
||||
pieces["dirty"] = dirty
|
||||
if dirty:
|
||||
git_describe = git_describe[:git_describe.rindex("-dirty")]
|
||||
|
||||
# now we have TAG-NUM-gHEX or HEX
|
||||
|
||||
if "-" in git_describe:
|
||||
# TAG-NUM-gHEX
|
||||
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
|
||||
if not mo:
|
||||
# unparseable. Maybe git-describe is misbehaving?
|
||||
pieces["error"] = ("unable to parse git-describe output: '%s'"
|
||||
% describe_out)
|
||||
return pieces
|
||||
|
||||
# tag
|
||||
full_tag = mo.group(1)
|
||||
if not full_tag.startswith(tag_prefix):
|
||||
if verbose:
|
||||
fmt = "tag '%s' doesn't start with prefix '%s'"
|
||||
print(fmt % (full_tag, tag_prefix))
|
||||
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
|
||||
% (full_tag, tag_prefix))
|
||||
return pieces
|
||||
pieces["closest-tag"] = full_tag[len(tag_prefix):]
|
||||
|
||||
# distance: number of commits since tag
|
||||
pieces["distance"] = int(mo.group(2))
|
||||
|
||||
# commit: short hex revision ID
|
||||
pieces["short"] = mo.group(3)
|
||||
|
||||
else:
|
||||
# HEX: no tags
|
||||
pieces["closest-tag"] = None
|
||||
count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
|
||||
cwd=root)
|
||||
pieces["distance"] = int(count_out) # total number of commits
|
||||
|
||||
return pieces
|
||||
|
||||
|
||||
def plus_or_dot(pieces):
|
||||
if "+" in pieces.get("closest-tag", ""):
|
||||
return "."
|
||||
return "+"
|
||||
|
||||
|
||||
def render_pep440(pieces):
|
||||
# now build up version string, with post-release "local version
|
||||
# identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
|
||||
# get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
|
||||
|
||||
# exceptions:
|
||||
# 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
|
||||
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += plus_or_dot(pieces)
|
||||
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dirty"
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
|
||||
pieces["short"])
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_pre(pieces):
|
||||
# TAG[.post.devDISTANCE] . No -dirty
|
||||
|
||||
# exceptions:
|
||||
# 1: no tags. 0.post.devDISTANCE
|
||||
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"]:
|
||||
rendered += ".post.dev%d" % pieces["distance"]
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post.dev%d" % pieces["distance"]
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_post(pieces):
|
||||
# TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
|
||||
# .dev0 sorts backwards (a dirty tree will appear "older" than the
|
||||
# corresponding clean one), but you shouldn't be releasing software with
|
||||
# -dirty anyways.
|
||||
|
||||
# exceptions:
|
||||
# 1: no tags. 0.postDISTANCE[.dev0]
|
||||
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += ".post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
rendered += plus_or_dot(pieces)
|
||||
rendered += "g%s" % pieces["short"]
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
rendered += "+g%s" % pieces["short"]
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_old(pieces):
|
||||
# TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
|
||||
|
||||
# exceptions:
|
||||
# 1: no tags. 0.postDISTANCE[.dev0]
|
||||
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += ".post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_git_describe(pieces):
|
||||
# TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
|
||||
# --always'
|
||||
|
||||
# exceptions:
|
||||
# 1: no tags. HEX[-dirty] (note: no 'g' prefix)
|
||||
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"]:
|
||||
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
|
||||
else:
|
||||
# exception #1
|
||||
rendered = pieces["short"]
|
||||
if pieces["dirty"]:
|
||||
rendered += "-dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_git_describe_long(pieces):
|
||||
# TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
|
||||
# --always -long'. The distance/hash is unconditional.
|
||||
|
||||
# exceptions:
|
||||
# 1: no tags. HEX[-dirty] (note: no 'g' prefix)
|
||||
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
|
||||
else:
|
||||
# exception #1
|
||||
rendered = pieces["short"]
|
||||
if pieces["dirty"]:
|
||||
rendered += "-dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render(pieces, style):
|
||||
if pieces["error"]:
|
||||
return {"version": "unknown",
|
||||
"full-revisionid": pieces.get("long"),
|
||||
"dirty": None,
|
||||
"error": pieces["error"]}
|
||||
|
||||
if not style or style == "default":
|
||||
style = "pep440" # the default
|
||||
|
||||
if style == "pep440":
|
||||
rendered = render_pep440(pieces)
|
||||
elif style == "pep440-pre":
|
||||
rendered = render_pep440_pre(pieces)
|
||||
elif style == "pep440-post":
|
||||
rendered = render_pep440_post(pieces)
|
||||
elif style == "pep440-old":
|
||||
rendered = render_pep440_old(pieces)
|
||||
elif style == "git-describe":
|
||||
rendered = render_git_describe(pieces)
|
||||
elif style == "git-describe-long":
|
||||
rendered = render_git_describe_long(pieces)
|
||||
else:
|
||||
raise ValueError("unknown style '%s'" % style)
|
||||
|
||||
return {"version": rendered, "full-revisionid": pieces["long"],
|
||||
"dirty": pieces["dirty"], "error": None}
|
||||
|
||||
|
||||
def get_versions():
|
||||
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
|
||||
# __file__, we can work backwards from there to the root. Some
|
||||
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
|
||||
# case we can only use expanded keywords.
|
||||
|
||||
cfg = get_config()
|
||||
verbose = cfg.verbose
|
||||
|
||||
try:
|
||||
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
|
||||
verbose)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
try:
|
||||
root = os.path.realpath(__file__)
|
||||
# versionfile_source is the relative path from the top of the source
|
||||
# tree (where the .git directory might live) to this file. Invert
|
||||
# this to find the root from __file__.
|
||||
for i in cfg.versionfile_source.split('/'):
|
||||
root = os.path.dirname(root)
|
||||
except NameError:
|
||||
return {"version": "0+unknown", "full-revisionid": None,
|
||||
"dirty": None,
|
||||
"error": "unable to find root of source tree"}
|
||||
|
||||
try:
|
||||
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
|
||||
return render(pieces, cfg.style)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
try:
|
||||
if cfg.parentdir_prefix:
|
||||
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
return {"version": "0+unknown", "full-revisionid": None,
|
||||
"dirty": None,
|
||||
"error": "unable to compute version"}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
|||
from ..scramp.core import ScramClient, ScramServer, ScramException
|
||||
|
||||
__all__ = [ScramClient, ScramServer, ScramException]
|
|
@ -0,0 +1,520 @@
|
|||
|
||||
# This file helps to compute a version number in source trees obtained from
|
||||
# git-archive tarball (such as those provided by githubs download-from-tag
|
||||
# feature). Distribution tarballs (built by setup.py sdist) and build
|
||||
# directories (produced by setup.py build) will contain a much shorter file
|
||||
# that just contains the computed version number.
|
||||
|
||||
# This file is released into the public domain. Generated by
|
||||
# versioneer-0.18 (https://github.com/warner/python-versioneer)
|
||||
|
||||
"""Git implementation of _version.py."""
|
||||
|
||||
import errno
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def get_keywords():
|
||||
"""Get the keywords needed to look up the version information."""
|
||||
# these strings will be replaced by git during git-archive.
|
||||
# setup.py/versioneer.py will grep for the variable names, so they must
|
||||
# each be defined on a line of their own. _version.py will just call
|
||||
# get_keywords().
|
||||
git_refnames = "$Format:%d$"
|
||||
git_full = "$Format:%H$"
|
||||
git_date = "$Format:%ci$"
|
||||
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
|
||||
return keywords
|
||||
|
||||
|
||||
class VersioneerConfig:
|
||||
"""Container for Versioneer configuration parameters."""
|
||||
|
||||
|
||||
def get_config():
|
||||
"""Create, populate and return the VersioneerConfig() object."""
|
||||
# these strings are filled in when 'setup.py versioneer' creates
|
||||
# _version.py
|
||||
cfg = VersioneerConfig()
|
||||
cfg.VCS = "git"
|
||||
cfg.style = "pep440"
|
||||
cfg.tag_prefix = ""
|
||||
cfg.parentdir_prefix = "scramp-"
|
||||
cfg.versionfile_source = "scramp/_version.py"
|
||||
cfg.verbose = False
|
||||
return cfg
|
||||
|
||||
|
||||
class NotThisMethod(Exception):
|
||||
"""Exception raised if a method is not valid for the current scenario."""
|
||||
|
||||
|
||||
LONG_VERSION_PY = {}
|
||||
HANDLERS = {}
|
||||
|
||||
|
||||
def register_vcs_handler(vcs, method): # decorator
|
||||
"""Decorator to mark a method as the handler for a particular VCS."""
|
||||
def decorate(f):
|
||||
"""Store f in HANDLERS[vcs][method]."""
|
||||
if vcs not in HANDLERS:
|
||||
HANDLERS[vcs] = {}
|
||||
HANDLERS[vcs][method] = f
|
||||
return f
|
||||
return decorate
|
||||
|
||||
|
||||
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
|
||||
env=None):
|
||||
"""Call the given command(s)."""
|
||||
assert isinstance(commands, list)
|
||||
p = None
|
||||
for c in commands:
|
||||
try:
|
||||
dispcmd = str([c] + args)
|
||||
# remember shell=False, so use git.cmd on windows, not just git
|
||||
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=(subprocess.PIPE if hide_stderr
|
||||
else None))
|
||||
break
|
||||
except EnvironmentError:
|
||||
e = sys.exc_info()[1]
|
||||
if e.errno == errno.ENOENT:
|
||||
continue
|
||||
if verbose:
|
||||
print("unable to run %s" % dispcmd)
|
||||
print(e)
|
||||
return None, None
|
||||
else:
|
||||
if verbose:
|
||||
print("unable to find command, tried %s" % (commands,))
|
||||
return None, None
|
||||
stdout = p.communicate()[0].strip()
|
||||
if sys.version_info[0] >= 3:
|
||||
stdout = stdout.decode()
|
||||
if p.returncode != 0:
|
||||
if verbose:
|
||||
print("unable to run %s (error)" % dispcmd)
|
||||
print("stdout was %s" % stdout)
|
||||
return None, p.returncode
|
||||
return stdout, p.returncode
|
||||
|
||||
|
||||
def versions_from_parentdir(parentdir_prefix, root, verbose):
|
||||
"""Try to determine the version from the parent directory name.
|
||||
|
||||
Source tarballs conventionally unpack into a directory that includes both
|
||||
the project name and a version string. We will also support searching up
|
||||
two directory levels for an appropriately named parent directory
|
||||
"""
|
||||
rootdirs = []
|
||||
|
||||
for i in range(3):
|
||||
dirname = os.path.basename(root)
|
||||
if dirname.startswith(parentdir_prefix):
|
||||
return {"version": dirname[len(parentdir_prefix):],
|
||||
"full-revisionid": None,
|
||||
"dirty": False, "error": None, "date": None}
|
||||
else:
|
||||
rootdirs.append(root)
|
||||
root = os.path.dirname(root) # up a level
|
||||
|
||||
if verbose:
|
||||
print("Tried directories %s but none started with prefix %s" %
|
||||
(str(rootdirs), parentdir_prefix))
|
||||
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
|
||||
|
||||
|
||||
@register_vcs_handler("git", "get_keywords")
|
||||
def git_get_keywords(versionfile_abs):
|
||||
"""Extract version information from the given file."""
|
||||
# the code embedded in _version.py can just fetch the value of these
|
||||
# keywords. When used from setup.py, we don't want to import _version.py,
|
||||
# so we do it with a regexp instead. This function is not used from
|
||||
# _version.py.
|
||||
keywords = {}
|
||||
try:
|
||||
f = open(versionfile_abs, "r")
|
||||
for line in f.readlines():
|
||||
if line.strip().startswith("git_refnames ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["refnames"] = mo.group(1)
|
||||
if line.strip().startswith("git_full ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["full"] = mo.group(1)
|
||||
if line.strip().startswith("git_date ="):
|
||||
mo = re.search(r'=\s*"(.*)"', line)
|
||||
if mo:
|
||||
keywords["date"] = mo.group(1)
|
||||
f.close()
|
||||
except EnvironmentError:
|
||||
pass
|
||||
return keywords
|
||||
|
||||
|
||||
@register_vcs_handler("git", "keywords")
|
||||
def git_versions_from_keywords(keywords, tag_prefix, verbose):
|
||||
"""Get version information from git keywords."""
|
||||
if not keywords:
|
||||
raise NotThisMethod("no keywords at all, weird")
|
||||
date = keywords.get("date")
|
||||
if date is not None:
|
||||
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
|
||||
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
|
||||
# -like" string, which we must then edit to make compliant), because
|
||||
# it's been around since git-1.5.3, and it's too difficult to
|
||||
# discover which version we're using, or to work around using an
|
||||
# older one.
|
||||
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
|
||||
refnames = keywords["refnames"].strip()
|
||||
if refnames.startswith("$Format"):
|
||||
if verbose:
|
||||
print("keywords are unexpanded, not using")
|
||||
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
|
||||
refs = set([r.strip() for r in refnames.strip("()").split(",")])
|
||||
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
|
||||
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
|
||||
TAG = "tag: "
|
||||
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
|
||||
if not tags:
|
||||
# Either we're using git < 1.8.3, or there really are no tags. We use
|
||||
# a heuristic: assume all version tags have a digit. The old git %d
|
||||
# expansion behaves like git log --decorate=short and strips out the
|
||||
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
|
||||
# between branches and tags. By ignoring refnames without digits, we
|
||||
# filter out many common branch names like "release" and
|
||||
# "stabilization", as well as "HEAD" and "master".
|
||||
tags = set([r for r in refs if re.search(r'\d', r)])
|
||||
if verbose:
|
||||
print("discarding '%s', no digits" % ",".join(refs - tags))
|
||||
if verbose:
|
||||
print("likely tags: %s" % ",".join(sorted(tags)))
|
||||
for ref in sorted(tags):
|
||||
# sorting will prefer e.g. "2.0" over "2.0rc1"
|
||||
if ref.startswith(tag_prefix):
|
||||
r = ref[len(tag_prefix):]
|
||||
if verbose:
|
||||
print("picking %s" % r)
|
||||
return {"version": r,
|
||||
"full-revisionid": keywords["full"].strip(),
|
||||
"dirty": False, "error": None,
|
||||
"date": date}
|
||||
# no suitable tags, so version is "0+unknown", but full hex is still there
|
||||
if verbose:
|
||||
print("no suitable tags, using unknown + full revision id")
|
||||
return {"version": "0+unknown",
|
||||
"full-revisionid": keywords["full"].strip(),
|
||||
"dirty": False, "error": "no suitable tags", "date": None}
|
||||
|
||||
|
||||
@register_vcs_handler("git", "pieces_from_vcs")
|
||||
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
|
||||
"""Get version from 'git describe' in the root of the source tree.
|
||||
|
||||
This only gets called if the git-archive 'subst' keywords were *not*
|
||||
expanded, and _version.py hasn't already been rewritten with a short
|
||||
version string, meaning we're inside a checked out source tree.
|
||||
"""
|
||||
GITS = ["git"]
|
||||
if sys.platform == "win32":
|
||||
GITS = ["git.cmd", "git.exe"]
|
||||
|
||||
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
|
||||
hide_stderr=True)
|
||||
if rc != 0:
|
||||
if verbose:
|
||||
print("Directory %s not under git control" % root)
|
||||
raise NotThisMethod("'git rev-parse --git-dir' returned error")
|
||||
|
||||
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
|
||||
# if there isn't one, this yields HEX[-dirty] (no NUM)
|
||||
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
|
||||
"--always", "--long",
|
||||
"--match", "%s*" % tag_prefix],
|
||||
cwd=root)
|
||||
# --long was added in git-1.5.5
|
||||
if describe_out is None:
|
||||
raise NotThisMethod("'git describe' failed")
|
||||
describe_out = describe_out.strip()
|
||||
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
|
||||
if full_out is None:
|
||||
raise NotThisMethod("'git rev-parse' failed")
|
||||
full_out = full_out.strip()
|
||||
|
||||
pieces = {}
|
||||
pieces["long"] = full_out
|
||||
pieces["short"] = full_out[:7] # maybe improved later
|
||||
pieces["error"] = None
|
||||
|
||||
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
|
||||
# TAG might have hyphens.
|
||||
git_describe = describe_out
|
||||
|
||||
# look for -dirty suffix
|
||||
dirty = git_describe.endswith("-dirty")
|
||||
pieces["dirty"] = dirty
|
||||
if dirty:
|
||||
git_describe = git_describe[:git_describe.rindex("-dirty")]
|
||||
|
||||
# now we have TAG-NUM-gHEX or HEX
|
||||
|
||||
if "-" in git_describe:
|
||||
# TAG-NUM-gHEX
|
||||
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
|
||||
if not mo:
|
||||
# unparseable. Maybe git-describe is misbehaving?
|
||||
pieces["error"] = ("unable to parse git-describe output: '%s'"
|
||||
% describe_out)
|
||||
return pieces
|
||||
|
||||
# tag
|
||||
full_tag = mo.group(1)
|
||||
if not full_tag.startswith(tag_prefix):
|
||||
if verbose:
|
||||
fmt = "tag '%s' doesn't start with prefix '%s'"
|
||||
print(fmt % (full_tag, tag_prefix))
|
||||
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
|
||||
% (full_tag, tag_prefix))
|
||||
return pieces
|
||||
pieces["closest-tag"] = full_tag[len(tag_prefix):]
|
||||
|
||||
# distance: number of commits since tag
|
||||
pieces["distance"] = int(mo.group(2))
|
||||
|
||||
# commit: short hex revision ID
|
||||
pieces["short"] = mo.group(3)
|
||||
|
||||
else:
|
||||
# HEX: no tags
|
||||
pieces["closest-tag"] = None
|
||||
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
|
||||
cwd=root)
|
||||
pieces["distance"] = int(count_out) # total number of commits
|
||||
|
||||
# commit date: see ISO-8601 comment in git_versions_from_keywords()
|
||||
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
|
||||
cwd=root)[0].strip()
|
||||
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
|
||||
|
||||
return pieces
|
||||
|
||||
|
||||
def plus_or_dot(pieces):
|
||||
"""Return a + if we don't already have one, else return a ."""
|
||||
if "+" in pieces.get("closest-tag", ""):
|
||||
return "."
|
||||
return "+"
|
||||
|
||||
|
||||
def render_pep440(pieces):
|
||||
"""Build up version string, with post-release "local version identifier".
|
||||
|
||||
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
|
||||
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
|
||||
|
||||
Exceptions:
|
||||
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += plus_or_dot(pieces)
|
||||
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dirty"
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
|
||||
pieces["short"])
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_pre(pieces):
|
||||
"""TAG[.post.devDISTANCE] -- No -dirty.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. 0.post.devDISTANCE
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"]:
|
||||
rendered += ".post.dev%d" % pieces["distance"]
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post.dev%d" % pieces["distance"]
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_post(pieces):
|
||||
"""TAG[.postDISTANCE[.dev0]+gHEX] .
|
||||
|
||||
The ".dev0" means dirty. Note that .dev0 sorts backwards
|
||||
(a dirty tree will appear "older" than the corresponding clean one),
|
||||
but you shouldn't be releasing software with -dirty anyways.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. 0.postDISTANCE[.dev0]
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += ".post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
rendered += plus_or_dot(pieces)
|
||||
rendered += "g%s" % pieces["short"]
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
rendered += "+g%s" % pieces["short"]
|
||||
return rendered
|
||||
|
||||
|
||||
def render_pep440_old(pieces):
|
||||
"""TAG[.postDISTANCE[.dev0]] .
|
||||
|
||||
The ".dev0" means dirty.
|
||||
|
||||
Eexceptions:
|
||||
1: no tags. 0.postDISTANCE[.dev0]
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"] or pieces["dirty"]:
|
||||
rendered += ".post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
else:
|
||||
# exception #1
|
||||
rendered = "0.post%d" % pieces["distance"]
|
||||
if pieces["dirty"]:
|
||||
rendered += ".dev0"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_git_describe(pieces):
|
||||
"""TAG[-DISTANCE-gHEX][-dirty].
|
||||
|
||||
Like 'git describe --tags --dirty --always'.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. HEX[-dirty] (note: no 'g' prefix)
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
if pieces["distance"]:
|
||||
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
|
||||
else:
|
||||
# exception #1
|
||||
rendered = pieces["short"]
|
||||
if pieces["dirty"]:
|
||||
rendered += "-dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render_git_describe_long(pieces):
|
||||
"""TAG-DISTANCE-gHEX[-dirty].
|
||||
|
||||
Like 'git describe --tags --dirty --always -long'.
|
||||
The distance/hash is unconditional.
|
||||
|
||||
Exceptions:
|
||||
1: no tags. HEX[-dirty] (note: no 'g' prefix)
|
||||
"""
|
||||
if pieces["closest-tag"]:
|
||||
rendered = pieces["closest-tag"]
|
||||
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
|
||||
else:
|
||||
# exception #1
|
||||
rendered = pieces["short"]
|
||||
if pieces["dirty"]:
|
||||
rendered += "-dirty"
|
||||
return rendered
|
||||
|
||||
|
||||
def render(pieces, style):
|
||||
"""Render the given version pieces into the requested style."""
|
||||
if pieces["error"]:
|
||||
return {"version": "unknown",
|
||||
"full-revisionid": pieces.get("long"),
|
||||
"dirty": None,
|
||||
"error": pieces["error"],
|
||||
"date": None}
|
||||
|
||||
if not style or style == "default":
|
||||
style = "pep440" # the default
|
||||
|
||||
if style == "pep440":
|
||||
rendered = render_pep440(pieces)
|
||||
elif style == "pep440-pre":
|
||||
rendered = render_pep440_pre(pieces)
|
||||
elif style == "pep440-post":
|
||||
rendered = render_pep440_post(pieces)
|
||||
elif style == "pep440-old":
|
||||
rendered = render_pep440_old(pieces)
|
||||
elif style == "git-describe":
|
||||
rendered = render_git_describe(pieces)
|
||||
elif style == "git-describe-long":
|
||||
rendered = render_git_describe_long(pieces)
|
||||
else:
|
||||
raise ValueError("unknown style '%s'" % style)
|
||||
|
||||
return {"version": rendered, "full-revisionid": pieces["long"],
|
||||
"dirty": pieces["dirty"], "error": None,
|
||||
"date": pieces.get("date")}
|
||||
|
||||
|
||||
def get_versions():
|
||||
"""Get version information or return default if unable to do so."""
|
||||
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
|
||||
# __file__, we can work backwards from there to the root. Some
|
||||
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
|
||||
# case we can only use expanded keywords.
|
||||
|
||||
cfg = get_config()
|
||||
verbose = cfg.verbose
|
||||
|
||||
try:
|
||||
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
|
||||
verbose)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
try:
|
||||
root = os.path.realpath(__file__)
|
||||
# versionfile_source is the relative path from the top of the source
|
||||
# tree (where the .git directory might live) to this file. Invert
|
||||
# this to find the root from __file__.
|
||||
for i in cfg.versionfile_source.split('/'):
|
||||
root = os.path.dirname(root)
|
||||
except NameError:
|
||||
return {"version": "0+unknown", "full-revisionid": None,
|
||||
"dirty": None,
|
||||
"error": "unable to find root of source tree",
|
||||
"date": None}
|
||||
|
||||
try:
|
||||
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
|
||||
return render(pieces, cfg.style)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
try:
|
||||
if cfg.parentdir_prefix:
|
||||
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
|
||||
except NotThisMethod:
|
||||
pass
|
||||
|
||||
return {"version": "0+unknown", "full-revisionid": None,
|
||||
"dirty": None,
|
||||
"error": "unable to compute version", "date": None}
|
|
@ -0,0 +1,354 @@
|
|||
import hmac
|
||||
from uuid import uuid4
|
||||
from base64 import b64encode, b64decode
|
||||
import hashlib
|
||||
from stringprep import (
|
||||
in_table_a1, in_table_b1, in_table_c21_c22, in_table_c3, in_table_c4,
|
||||
in_table_c5, in_table_c6, in_table_c7, in_table_c8, in_table_c9,
|
||||
in_table_c12, in_table_d1, in_table_d2)
|
||||
import unicodedata
|
||||
from os import urandom
|
||||
from enum import IntEnum, unique
|
||||
|
||||
# https://tools.ietf.org/html/rfc5802
|
||||
# https://www.rfc-editor.org/rfc/rfc7677.txt
|
||||
|
||||
|
||||
@unique
|
||||
class ClientStage(IntEnum):
|
||||
get_client_first = 1
|
||||
set_server_first = 2
|
||||
get_client_final = 3
|
||||
set_server_final = 4
|
||||
|
||||
|
||||
@unique
|
||||
class ServerStage(IntEnum):
|
||||
set_client_first = 1
|
||||
get_server_first = 2
|
||||
set_client_final = 3
|
||||
get_server_final = 4
|
||||
|
||||
|
||||
def _check_stage(Stages, current_stage, next_stage):
|
||||
if current_stage is None:
|
||||
if next_stage != 1:
|
||||
raise ScramException(
|
||||
"The method " + Stages(1).name + " must be called first.")
|
||||
elif current_stage == 4:
|
||||
raise ScramException(
|
||||
"The authentication sequence has already finished.")
|
||||
elif next_stage != current_stage + 1:
|
||||
raise ScramException(
|
||||
"The next method to be called is " + Stages(current_stage + 1) +
|
||||
", not this method.")
|
||||
|
||||
|
||||
class ScramException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
MECHANISMS = ('SCRAM-SHA-1', 'SCRAM-SHA-256')
|
||||
|
||||
|
||||
HASHES = {
|
||||
'SCRAM-SHA-1': hashlib.sha1,
|
||||
'SCRAM-SHA-256': hashlib.sha256
|
||||
}
|
||||
|
||||
|
||||
class ScramClient():
|
||||
def __init__(self, mechanisms, username, password, c_nonce=None):
|
||||
self.mech = None
|
||||
for mech in MECHANISMS:
|
||||
if mech in mechanisms:
|
||||
self.mech = mech
|
||||
|
||||
if self.mech is None:
|
||||
raise ScramException(
|
||||
"The only recognized mechanisms are " + str(MECHANISMS) +
|
||||
"and none of those can be found in " + mechanisms + ".")
|
||||
|
||||
self.hf = HASHES[self.mech]
|
||||
|
||||
if c_nonce is None:
|
||||
self.c_nonce = _make_nonce()
|
||||
else:
|
||||
self.c_nonce = c_nonce
|
||||
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.stage = None
|
||||
|
||||
def _set_stage(self, next_stage):
|
||||
_check_stage(ClientStage, self.stage, next_stage)
|
||||
self.stage = next_stage
|
||||
|
||||
def get_client_first(self):
|
||||
self._set_stage(ClientStage.get_client_first)
|
||||
self.client_first_bare, client_first = _get_client_first(
|
||||
self.username, self.c_nonce)
|
||||
return client_first
|
||||
|
||||
def set_server_first(self, message):
|
||||
self._set_stage(ClientStage.set_server_first)
|
||||
self.server_first = message
|
||||
self.auth_message, self.nonce, self.salt, self.iterations = \
|
||||
_set_server_first(message, self.c_nonce, self.client_first_bare)
|
||||
|
||||
def get_client_final(self):
|
||||
self._set_stage(ClientStage.get_client_final)
|
||||
self.server_signature, cfinal = _get_client_final(
|
||||
self.hf, self.password, self.salt, self.iterations, self.nonce,
|
||||
self.auth_message)
|
||||
return cfinal
|
||||
|
||||
def set_server_final(self, message):
|
||||
self._set_stage(ClientStage.set_server_final)
|
||||
_set_server_final(message, self.server_signature)
|
||||
|
||||
|
||||
class ScramServer():
|
||||
def __init__(
|
||||
self, password_fn, s_nonce=None, iterations=4096, salt=None,
|
||||
mechanism='SCRAM-SHA-256'):
|
||||
if mechanism not in MECHANISMS:
|
||||
raise ScramException(
|
||||
"The only recognized mechanisms are " + str(MECHANISMS) +
|
||||
".")
|
||||
self.mechanism = mechanism
|
||||
self.hf = HASHES[self.mechanism]
|
||||
|
||||
if s_nonce is None:
|
||||
self.s_nonce = _make_nonce()
|
||||
else:
|
||||
self.s_nonce = s_nonce
|
||||
|
||||
if salt is None:
|
||||
self.salt = _b64enc(urandom(16))
|
||||
else:
|
||||
self.salt = salt
|
||||
|
||||
self.password_fn = password_fn
|
||||
self.iterations = iterations
|
||||
self.stage = None
|
||||
|
||||
def _set_stage(self, next_stage):
|
||||
_check_stage(ServerStage, self.stage, next_stage)
|
||||
self.stage = next_stage
|
||||
|
||||
def set_client_first(self, client_first):
|
||||
self._set_stage(ServerStage.set_client_first)
|
||||
self.nonce, self.user, self.client_first_bare = _set_client_first(
|
||||
client_first, self.s_nonce)
|
||||
self.password = self.password_fn(self.user)
|
||||
|
||||
def get_server_first(self):
|
||||
self._set_stage(ServerStage.get_server_first)
|
||||
self.auth_message, server_first = _get_server_first(
|
||||
self.nonce, self.salt, self.iterations, self.client_first_bare)
|
||||
return server_first
|
||||
|
||||
def set_client_final(self, client_final):
|
||||
self._set_stage(ServerStage.set_client_final)
|
||||
self.server_signature = _set_client_final(
|
||||
self.hf, client_final, self.s_nonce, self.password, self.salt,
|
||||
self.iterations, self.auth_message)
|
||||
|
||||
def get_server_final(self):
|
||||
self._set_stage(ServerStage.get_server_final)
|
||||
return _get_server_final(self.server_signature)
|
||||
|
||||
|
||||
def _make_nonce():
|
||||
return str(uuid4()).replace('-', '')
|
||||
|
||||
|
||||
def _make_auth_message(nonce, client_first_bare, server_first):
|
||||
msg = client_first_bare, server_first, 'c=' + _b64enc(b'n,,'), 'r=' + nonce
|
||||
return ','.join(msg)
|
||||
|
||||
|
||||
def _proof_signature(hf, password, salt, iterations, auth_msg):
|
||||
salted_password = _hi(
|
||||
hf, _uenc(saslprep(password)), _b64dec(salt), iterations)
|
||||
client_key = _hmac(hf, salted_password, b"Client Key")
|
||||
stored_key = _h(hf, client_key)
|
||||
|
||||
client_signature = _hmac(hf, stored_key, _uenc(auth_msg))
|
||||
client_proof = _xor(client_key, client_signature)
|
||||
|
||||
server_key = _hmac(hf, salted_password, b"Server Key")
|
||||
server_signature = _hmac(hf, server_key, _uenc(auth_msg))
|
||||
return _b64enc(client_proof), _b64enc(server_signature)
|
||||
|
||||
|
||||
def _hmac(hf, key, msg):
|
||||
return hmac.new(key, msg=msg, digestmod=hf).digest()
|
||||
|
||||
|
||||
def _h(hf, msg):
|
||||
return hf(msg).digest()
|
||||
|
||||
|
||||
def _hi(hf, password, salt, iterations):
|
||||
u = ui = _hmac(hf, password, salt + b'\x00\x00\x00\x01')
|
||||
for i in range(iterations - 1):
|
||||
ui = _hmac(hf, password, ui)
|
||||
u = _xor(u, ui)
|
||||
return u
|
||||
|
||||
|
||||
def _hi_iter(password, mac, iterations):
|
||||
if iterations == 0:
|
||||
return mac
|
||||
else:
|
||||
new_mac = _hmac(password, mac)
|
||||
return _xor(_hi_iter(password, new_mac, iterations-1), mac)
|
||||
|
||||
|
||||
def _parse_message(msg):
|
||||
return dict((e[0], e[2:]) for e in msg.split(',') if len(e) > 1)
|
||||
|
||||
|
||||
def _b64enc(binary):
|
||||
return b64encode(binary).decode('utf8')
|
||||
|
||||
|
||||
def _b64dec(string):
|
||||
return b64decode(string)
|
||||
|
||||
|
||||
def _uenc(string):
|
||||
return string.encode('utf-8')
|
||||
|
||||
|
||||
def _xor(bytes1, bytes2):
|
||||
return bytes(a ^ b for a, b in zip(bytes1, bytes2))
|
||||
|
||||
|
||||
def _get_client_first(username, c_nonce):
|
||||
bare = ','.join(('n=' + saslprep(username), 'r=' + c_nonce))
|
||||
return bare, 'n,,' + bare
|
||||
|
||||
|
||||
def _set_client_first(client_first, s_nonce):
|
||||
msg = _parse_message(client_first)
|
||||
c_nonce = msg['r']
|
||||
nonce = c_nonce + s_nonce
|
||||
user = msg['n']
|
||||
client_first_bare = client_first[3:]
|
||||
|
||||
return nonce, user, client_first_bare
|
||||
|
||||
|
||||
def _get_server_first(nonce, salt, iterations, client_first_bare):
|
||||
sfirst = ','.join(('r=' + nonce, 's=' + salt, 'i=' + str(iterations)))
|
||||
auth_msg = _make_auth_message(nonce, client_first_bare, sfirst)
|
||||
return auth_msg, sfirst
|
||||
|
||||
|
||||
def _set_server_first(server_first, c_nonce, client_first_bare):
|
||||
msg = _parse_message(server_first)
|
||||
nonce = msg['r']
|
||||
salt = msg['s']
|
||||
iterations = int(msg['i'])
|
||||
|
||||
if not nonce.startswith(c_nonce):
|
||||
raise ScramException("Client nonce doesn't match.")
|
||||
|
||||
auth_msg = _make_auth_message(nonce, client_first_bare, server_first)
|
||||
return auth_msg, nonce, salt, iterations
|
||||
|
||||
|
||||
def _get_client_final(hf, password, salt, iterations, nonce, auth_msg):
|
||||
client_proof, server_signature = _proof_signature(
|
||||
hf, password, salt, iterations, auth_msg)
|
||||
|
||||
message = ['c=' + _b64enc(b'n,,'), 'r=' + nonce, 'p=' + client_proof]
|
||||
return server_signature, ','.join(message)
|
||||
|
||||
|
||||
def _set_client_final(
|
||||
hf, client_final, s_nonce, password, salt, iterations, auth_msg):
|
||||
|
||||
msg = _parse_message(client_final)
|
||||
nonce = msg['r']
|
||||
proof = msg['p']
|
||||
|
||||
if not nonce.endswith(s_nonce):
|
||||
raise ScramException("Server nonce doesn't match.")
|
||||
|
||||
client_proof, server_signature = _proof_signature(
|
||||
hf, password, salt, iterations, auth_msg)
|
||||
|
||||
if client_proof != proof:
|
||||
raise ScramException("The proofs don't match")
|
||||
|
||||
return server_signature
|
||||
|
||||
|
||||
def _get_server_final(server_signature):
|
||||
return 'v=' + server_signature
|
||||
|
||||
|
||||
def _set_server_final(message, server_signature):
|
||||
msg = _parse_message(message)
|
||||
if server_signature != msg['v']:
|
||||
raise ScramException("The server signature doesn't match.")
|
||||
|
||||
|
||||
def saslprep(source):
|
||||
# mapping stage
|
||||
# - map non-ascii spaces to U+0020 (stringprep C.1.2)
|
||||
# - strip 'commonly mapped to nothing' chars (stringprep B.1)
|
||||
data = ''.join(
|
||||
' ' if in_table_c12(c) else c for c in source if not in_table_b1(c))
|
||||
|
||||
# normalize to KC form
|
||||
data = unicodedata.normalize('NFKC', data)
|
||||
if not data:
|
||||
return ''
|
||||
|
||||
# check for invalid bi-directional strings.
|
||||
# stringprep requires the following:
|
||||
# - chars in C.8 must be prohibited.
|
||||
# - if any R/AL chars in string:
|
||||
# - no L chars allowed in string
|
||||
# - first and last must be R/AL chars
|
||||
# this checks if start/end are R/AL chars. if so, prohibited loop
|
||||
# will forbid all L chars. if not, prohibited loop will forbid all
|
||||
# R/AL chars instead. in both cases, prohibited loop takes care of C.8.
|
||||
is_ral_char = in_table_d1
|
||||
if is_ral_char(data[0]):
|
||||
if not is_ral_char(data[-1]):
|
||||
raise ValueError("malformed bidi sequence")
|
||||
# forbid L chars within R/AL sequence.
|
||||
is_forbidden_bidi_char = in_table_d2
|
||||
else:
|
||||
# forbid R/AL chars if start not setup correctly; L chars allowed.
|
||||
is_forbidden_bidi_char = is_ral_char
|
||||
|
||||
# check for prohibited output
|
||||
# stringprep tables A.1, B.1, C.1.2, C.2 - C.9
|
||||
for c in data:
|
||||
# check for chars mapping stage should have removed
|
||||
assert not in_table_b1(c), "failed to strip B.1 in mapping stage"
|
||||
assert not in_table_c12(c), "failed to replace C.1.2 in mapping stage"
|
||||
|
||||
# check for forbidden chars
|
||||
for f, msg in (
|
||||
(in_table_a1, "unassigned code points forbidden"),
|
||||
(in_table_c21_c22, "control characters forbidden"),
|
||||
(in_table_c3, "private use characters forbidden"),
|
||||
(in_table_c4, "non-char code points forbidden"),
|
||||
(in_table_c5, "surrogate codes forbidden"),
|
||||
(in_table_c6, "non-plaintext chars forbidden"),
|
||||
(in_table_c7, "non-canonical chars forbidden"),
|
||||
(in_table_c8, "display-modifying/deprecated chars forbidden"),
|
||||
(in_table_c9, "tagged characters forbidden"),
|
||||
(is_forbidden_bidi_char, "forbidden bidi character")):
|
||||
if f(c):
|
||||
raise ValueError(msg)
|
||||
|
||||
return data
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def get_info_path(path):
|
||||
path, filename = os.path.split(path)
|
||||
name, extension = os.path.splitext(filename)
|
||||
return (path, filename, name, extension)
|
||||
|
||||
|
||||
path, *_ = get_info_path(__file__)
|
||||
|
||||
sys.path.append(path)
|
||||
|
||||
from .requests import *
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
from .core import where
|
||||
|
||||
__version__ = "2019.09.11"
|
|
@ -0,0 +1,2 @@
|
|||
from certifi import where
|
||||
print(where())
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
certifi.py
|
||||
~~~~~~~~~~
|
||||
|
||||
This module returns the installation location of cacert.pem.
|
||||
"""
|
||||
import os
|
||||
|
||||
|
||||
def where():
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, 'cacert.pem')
|
|
@ -0,0 +1,39 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
from .compat import PY2, PY3
|
||||
from .universaldetector import UniversalDetector
|
||||
from .version import __version__, VERSION
|
||||
|
||||
|
||||
def detect(byte_str):
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{0}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
|
@ -0,0 +1,386 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Big5 frequency table
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http://www.edu.tw:81/mandr/>
|
||||
#
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
#Char to FreqOrder table
|
||||
BIG5_TABLE_SIZE = 5376
|
||||
|
||||
BIG5_CHAR_TO_FREQ_ORDER = (
|
||||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
||||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
||||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
||||
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
|
||||
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
|
||||
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
|
||||
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
|
||||
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
|
||||
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
|
||||
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
|
||||
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
|
||||
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
|
||||
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
|
||||
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
|
||||
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
|
||||
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
|
||||
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
|
||||
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
|
||||
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
|
||||
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
|
||||
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
|
||||
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
|
||||
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
|
||||
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
|
||||
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
|
||||
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
|
||||
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
|
||||
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
|
||||
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
|
||||
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
|
||||
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
|
||||
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
|
||||
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
|
||||
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
|
||||
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
|
||||
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
|
||||
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
|
||||
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
|
||||
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
|
||||
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
|
||||
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
|
||||
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
|
||||
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
|
||||
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
|
||||
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
|
||||
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
|
||||
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
|
||||
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
|
||||
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
|
||||
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
|
||||
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
|
||||
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
|
||||
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
|
||||
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
|
||||
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
|
||||
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
|
||||
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
|
||||
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
|
||||
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
|
||||
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
|
||||
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
|
||||
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
|
||||
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
|
||||
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
|
||||
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
|
||||
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
|
||||
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
|
||||
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
|
||||
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
|
||||
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
|
||||
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
|
||||
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
|
||||
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
|
||||
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
|
||||
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
|
||||
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
|
||||
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
|
||||
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
|
||||
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
|
||||
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
|
||||
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
|
||||
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
|
||||
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
|
||||
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
|
||||
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
|
||||
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
|
||||
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
|
||||
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
|
||||
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
|
||||
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
|
||||
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
|
||||
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
|
||||
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
|
||||
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
|
||||
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
|
||||
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
|
||||
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
|
||||
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
|
||||
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
|
||||
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
|
||||
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
|
||||
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
|
||||
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
|
||||
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
|
||||
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
|
||||
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
|
||||
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
|
||||
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
|
||||
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
|
||||
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
|
||||
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
|
||||
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
|
||||
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
|
||||
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
|
||||
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
|
||||
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
|
||||
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
|
||||
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
|
||||
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
|
||||
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
|
||||
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
|
||||
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
|
||||
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
|
||||
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
|
||||
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
|
||||
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
|
||||
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
|
||||
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
|
||||
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
|
||||
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
|
||||
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
|
||||
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
|
||||
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
|
||||
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
|
||||
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
|
||||
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
|
||||
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
|
||||
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
|
||||
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
|
||||
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
|
||||
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
|
||||
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
|
||||
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
|
||||
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
|
||||
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
|
||||
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
|
||||
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
|
||||
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
|
||||
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
|
||||
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
|
||||
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
|
||||
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
|
||||
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
|
||||
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
|
||||
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
|
||||
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
|
||||
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
|
||||
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
|
||||
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
|
||||
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
|
||||
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
|
||||
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
|
||||
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
|
||||
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
|
||||
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
|
||||
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
|
||||
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
|
||||
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
|
||||
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
|
||||
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
|
||||
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
|
||||
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
|
||||
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
|
||||
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
|
||||
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
|
||||
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
|
||||
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
|
||||
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
|
||||
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
|
||||
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
|
||||
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
|
||||
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
|
||||
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
|
||||
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
|
||||
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
|
||||
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
|
||||
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
|
||||
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
|
||||
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
|
||||
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
|
||||
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
|
||||
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
|
||||
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
|
||||
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
|
||||
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
|
||||
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
|
||||
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
|
||||
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
|
||||
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
|
||||
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
|
||||
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
|
||||
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
|
||||
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
|
||||
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
|
||||
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
|
||||
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
|
||||
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
|
||||
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
|
||||
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
|
||||
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
|
||||
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
|
||||
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
|
||||
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
|
||||
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
|
||||
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
|
||||
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
|
||||
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
|
||||
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
|
||||
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
|
||||
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
|
||||
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
|
||||
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
|
||||
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
|
||||
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
|
||||
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
|
||||
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
|
||||
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
|
||||
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
|
||||
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
|
||||
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
|
||||
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
|
||||
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
|
||||
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
|
||||
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
|
||||
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
|
||||
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
|
||||
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
|
||||
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
|
||||
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
|
||||
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
|
||||
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
|
||||
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
|
||||
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
|
||||
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
|
||||
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
|
||||
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
|
||||
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
|
||||
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
|
||||
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
|
||||
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
|
||||
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
|
||||
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
|
||||
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
|
||||
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
|
||||
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
|
||||
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
|
||||
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
|
||||
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
|
||||
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
|
||||
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
|
||||
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
|
||||
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
|
||||
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
|
||||
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
|
||||
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
|
||||
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
|
||||
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
|
||||
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
|
||||
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
|
||||
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
|
||||
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
|
||||
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
|
||||
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
|
||||
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
|
||||
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
|
||||
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
|
||||
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
|
||||
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
|
||||
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
|
||||
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
|
||||
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
|
||||
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
|
||||
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
|
||||
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
|
||||
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
|
||||
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
|
||||
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
|
||||
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
|
||||
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
|
||||
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
|
||||
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
|
||||
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
|
||||
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
|
||||
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
|
||||
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
|
||||
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
|
||||
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
|
||||
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
|
||||
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
|
||||
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
|
||||
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
|
||||
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
|
||||
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
|
||||
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
|
||||
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
|
||||
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
|
||||
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
|
||||
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
|
||||
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
|
||||
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
|
||||
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
|
||||
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
|
||||
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
|
||||
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
|
||||
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
|
||||
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
|
||||
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
|
||||
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
|
||||
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
|
||||
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
|
||||
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
|
||||
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
|
||||
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
|
||||
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
|
||||
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
|
||||
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
|
||||
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
|
||||
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
|
||||
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
|
||||
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
|
||||
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
|
||||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
||||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
||||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
|
||||
)
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import Big5DistributionAnalysis
|
||||
from .mbcssm import BIG5_SM_MODEL
|
||||
|
||||
|
||||
class Big5Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(Big5Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "Big5"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
|
@ -0,0 +1,233 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
||||
|
||||
|
||||
class CharDistributionAnalysis(object):
|
||||
ENOUGH_DATA_THRESHOLD = 1024
|
||||
SURE_YES = 0.99
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
def __init__(self):
|
||||
# Mapping table to get frequency order from char order (get from
|
||||
# GetOrder())
|
||||
self._char_to_freq_order = None
|
||||
self._table_size = None # Size of above table
|
||||
# This is a constant value which varies from language to language,
|
||||
# used in calculating confidence. See
|
||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||
# for further detail.
|
||||
self.typical_distribution_ratio = None
|
||||
self._done = None
|
||||
self._total_chars = None
|
||||
self._freq_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""reset analyser, clear any state"""
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
self._total_chars = 0 # Total characters encountered
|
||||
# The number of characters whose frequency order is less than 512
|
||||
self._freq_chars = 0
|
||||
|
||||
def feed(self, char, char_len):
|
||||
"""feed a character with known length"""
|
||||
if char_len == 2:
|
||||
# we only care about 2-bytes character in our distribution analysis
|
||||
order = self.get_order(char)
|
||||
else:
|
||||
order = -1
|
||||
if order >= 0:
|
||||
self._total_chars += 1
|
||||
# order is valid
|
||||
if order < self._table_size:
|
||||
if 512 > self._char_to_freq_order[order]:
|
||||
self._freq_chars += 1
|
||||
|
||||
def get_confidence(self):
|
||||
"""return confidence based on existing data"""
|
||||
# if we didn't receive any character in our consideration range,
|
||||
# return negative answer
|
||||
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
|
||||
return self.SURE_NO
|
||||
|
||||
if self._total_chars != self._freq_chars:
|
||||
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
|
||||
* self.typical_distribution_ratio))
|
||||
if r < self.SURE_YES:
|
||||
return r
|
||||
|
||||
# normalize confidence (we don't want to be 100% sure)
|
||||
return self.SURE_YES
|
||||
|
||||
def got_enough_data(self):
|
||||
# It is not necessary to receive all data to draw conclusion.
|
||||
# For charset detection, certain amount of data is enough
|
||||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# We do not handle characters based on the original encoding string,
|
||||
# but convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency
|
||||
# table.
|
||||
return -1
|
||||
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCTWDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCTW_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-TW encoding, we are interested
|
||||
# first byte range: 0xc4 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xC4:
|
||||
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCKRDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-KR encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xB0:
|
||||
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(GB2312DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = GB2312_TABLE_SIZE
|
||||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for GB2312 encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
||||
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(Big5DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = BIG5_TABLE_SIZE
|
||||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for big5 encoding, we are interested
|
||||
# first byte range: 0xa4 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if first_char >= 0xA4:
|
||||
if second_char >= 0xA1:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
||||
else:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0x40
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for sjis encoding, we are interested
|
||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0x81) and (first_char <= 0x9F):
|
||||
order = 188 * (first_char - 0x81)
|
||||
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
||||
order = 188 * (first_char - 0xE0 + 31)
|
||||
else:
|
||||
return -1
|
||||
order = order + second_char - 0x40
|
||||
if second_char > 0x7F:
|
||||
order = -1
|
||||
return order
|
||||
|
||||
|
||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCJPDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-JP encoding, we are interested
|
||||
# first byte range: 0xa0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
char = byte_str[0]
|
||||
if char >= 0xA0:
|
||||
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
|
||||
else:
|
||||
return -1
|
|
@ -0,0 +1,106 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState
|
||||
from .charsetprober import CharSetProber
|
||||
|
||||
|
||||
class CharSetGroupProber(CharSetProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self._active_num = 0
|
||||
self.probers = []
|
||||
self._best_guess_prober = None
|
||||
|
||||
def reset(self):
|
||||
super(CharSetGroupProber, self).reset()
|
||||
self._active_num = 0
|
||||
for prober in self.probers:
|
||||
if prober:
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._active_num += 1
|
||||
self._best_guess_prober = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
continue
|
||||
state = prober.feed(byte_str)
|
||||
if not state:
|
||||
continue
|
||||
if state == ProbingState.FOUND_IT:
|
||||
self._best_guess_prober = prober
|
||||
return self.state
|
||||
elif state == ProbingState.NOT_ME:
|
||||
prober.active = False
|
||||
self._active_num -= 1
|
||||
if self._active_num <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
state = self.state
|
||||
if state == ProbingState.FOUND_IT:
|
||||
return 0.99
|
||||
elif state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
best_conf = 0.0
|
||||
self._best_guess_prober = None
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
self.logger.debug('%s not active', prober.charset_name)
|
||||
continue
|
||||
conf = prober.get_confidence()
|
||||
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
|
||||
if best_conf < conf:
|
||||
best_conf = conf
|
||||
self._best_guess_prober = prober
|
||||
if not self._best_guess_prober:
|
||||
return 0.0
|
||||
return best_conf
|
|
@ -0,0 +1,145 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import ProbingState
|
||||
|
||||
|
||||
class CharSetProber(object):
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
self._state = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def reset(self):
|
||||
self._state = ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return None
|
||||
|
||||
def feed(self, buf):
|
||||
pass
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def filter_high_byte_only(buf):
|
||||
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
|
||||
return buf
|
||||
|
||||
@staticmethod
|
||||
def filter_international_words(buf):
|
||||
"""
|
||||
We define three types of bytes:
|
||||
alphabet: english alphabets [a-zA-Z]
|
||||
international: international characters [\x80-\xFF]
|
||||
marker: everything else [^a-zA-Z\x80-\xFF]
|
||||
|
||||
The input buffer can be thought to contain a series of words delimited
|
||||
by markers. This function works to filter all words that contain at
|
||||
least one international character. All contiguous sequences of markers
|
||||
are replaced by a single space ascii character.
|
||||
|
||||
This filter applies to all scripts which do not use English characters.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
|
||||
# This regex expression filters out only words that have at-least one
|
||||
# international character. The word may include one marker character at
|
||||
# the end.
|
||||
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
|
||||
buf)
|
||||
|
||||
for word in words:
|
||||
filtered.extend(word[:-1])
|
||||
|
||||
# If the last character in the word is a marker, replace it with a
|
||||
# space as markers shouldn't affect our analysis (they are used
|
||||
# similarly across all languages and may thus have similar
|
||||
# frequencies).
|
||||
last_char = word[-1:]
|
||||
if not last_char.isalpha() and last_char < b'\x80':
|
||||
last_char = b' '
|
||||
filtered.extend(last_char)
|
||||
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_with_english_letters(buf):
|
||||
"""
|
||||
Returns a copy of ``buf`` that retains only the sequences of English
|
||||
alphabet and high byte characters that are not between <> characters.
|
||||
Also retains English alphabet and high byte characters immediately
|
||||
before occurrences of >.
|
||||
|
||||
This filter can be applied to all scripts which contain both English
|
||||
characters and extended ASCII characters, but is currently only used by
|
||||
``Latin1Prober``.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
in_tag = False
|
||||
prev = 0
|
||||
|
||||
for curr in range(len(buf)):
|
||||
# Slice here to get bytes instead of an int with Python 3
|
||||
buf_char = buf[curr:curr + 1]
|
||||
# Check if we're coming out of or entering an HTML tag
|
||||
if buf_char == b'>':
|
||||
in_tag = False
|
||||
elif buf_char == b'<':
|
||||
in_tag = True
|
||||
|
||||
# If current character is not extended-ASCII and not alphabetic...
|
||||
if buf_char < b'\x80' and not buf_char.isalpha():
|
||||
# ...and we're not in a tag
|
||||
if curr > prev and not in_tag:
|
||||
# Keep everything after last non-extended-ASCII,
|
||||
# non-alphabetic character
|
||||
filtered.extend(buf[prev:curr])
|
||||
# Output a space to delimit stretch we kept
|
||||
filtered.extend(b' ')
|
||||
prev = curr + 1
|
||||
|
||||
# If we're not in a tag...
|
||||
if not in_tag:
|
||||
# Keep everything after last non-extended-ASCII, non-alphabetic
|
||||
# character
|
||||
filtered.extend(buf[prev:])
|
||||
|
||||
return filtered
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Script which takes one or more file paths and reports on their detected
|
||||
encodings
|
||||
|
||||
Example::
|
||||
|
||||
% chardetect somefile someotherfile
|
||||
somefile: windows-1252 with confidence 0.5
|
||||
someotherfile: ascii with confidence 1.0
|
||||
|
||||
If no paths are provided, it takes its input from stdin.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from chardet import __version__
|
||||
from chardet.compat import PY2
|
||||
from chardet.universaldetector import UniversalDetector
|
||||
|
||||
|
||||
def description_of(lines, name='stdin'):
|
||||
"""
|
||||
Return a string describing the probable encoding of a file or
|
||||
list of strings.
|
||||
|
||||
:param lines: The lines to get the encoding of.
|
||||
:type lines: Iterable of bytes
|
||||
:param name: Name of file or collection of lines
|
||||
:type name: str
|
||||
"""
|
||||
u = UniversalDetector()
|
||||
for line in lines:
|
||||
line = bytearray(line)
|
||||
u.feed(line)
|
||||
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
|
||||
if u.done:
|
||||
break
|
||||
u.close()
|
||||
result = u.result
|
||||
if PY2:
|
||||
name = name.decode(sys.getfilesystemencoding(), 'ignore')
|
||||
if result['encoding']:
|
||||
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
|
||||
result['confidence'])
|
||||
else:
|
||||
return '{0}: no result'.format(name)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
"""
|
||||
Handles command line arguments and gets things started.
|
||||
|
||||
:param argv: List of arguments, as if specified on the command-line.
|
||||
If None, ``sys.argv[1:]`` is used instead.
|
||||
:type argv: list of str
|
||||
"""
|
||||
# Get command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Takes one or more file paths and reports their detected \
|
||||
encodings")
|
||||
parser.add_argument('input',
|
||||
help='File whose encoding we would like to determine. \
|
||||
(default: stdin)',
|
||||
type=argparse.FileType('rb'), nargs='*',
|
||||
default=[sys.stdin if PY2 else sys.stdin.buffer])
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {0}'.format(__version__))
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
for f in args.input:
|
||||
if f.isatty():
|
||||
print("You are running chardetect interactively. Press " +
|
||||
"CTRL-D twice at the start of a blank line to signal the " +
|
||||
"end of your input. If you want help, run chardetect " +
|
||||
"--help\n", file=sys.stderr)
|
||||
print(description_of(f, f.name))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,88 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
|
||||
class CodingStateMachine(object):
|
||||
"""
|
||||
A state machine to verify a byte sequence for a particular encoding. For
|
||||
each byte the detector receives, it will feed that byte to every active
|
||||
state machine available, one byte at a time. The state machine changes its
|
||||
state based on its previous state and the byte it receives. There are 3
|
||||
states in a state machine that are of interest to an auto-detector:
|
||||
|
||||
START state: This is the state to start with, or a legal byte sequence
|
||||
(i.e. a valid code point) for character has been identified.
|
||||
|
||||
ME state: This indicates that the state machine identified a byte sequence
|
||||
that is specific to the charset it is designed for and that
|
||||
there is no other possible encoding which can contain this byte
|
||||
sequence. This will to lead to an immediate positive answer for
|
||||
the detector.
|
||||
|
||||
ERROR state: This indicates the state machine identified an illegal byte
|
||||
sequence for that encoding. This will lead to an immediate
|
||||
negative answer for this encoding. Detector will exclude this
|
||||
encoding from consideration from here on.
|
||||
"""
|
||||
def __init__(self, sm):
|
||||
self._model = sm
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = 0
|
||||
self._curr_state = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._curr_state = MachineState.START
|
||||
|
||||
def next_state(self, c):
|
||||
# for each byte we get its class
|
||||
# if it is first byte, we also get byte length
|
||||
byte_class = self._model['class_table'][c]
|
||||
if self._curr_state == MachineState.START:
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = self._model['char_len_table'][byte_class]
|
||||
# from byte's class and state_table, we get its next state
|
||||
curr_state = (self._curr_state * self._model['class_factor']
|
||||
+ byte_class)
|
||||
self._curr_state = self._model['state_table'][curr_state]
|
||||
self._curr_byte_pos += 1
|
||||
return self._curr_state
|
||||
|
||||
def get_current_charlen(self):
|
||||
return self._curr_char_len
|
||||
|
||||
def get_coding_state_machine(self):
|
||||
return self._model['name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._model['language']
|
|
@ -0,0 +1,34 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# Contributor(s):
|
||||
# Dan Blanchard
|
||||
# Ian Cordasco
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
PY2 = True
|
||||
PY3 = False
|
||||
base_str = (str, unicode)
|
||||
text_type = unicode
|
||||
else:
|
||||
PY2 = False
|
||||
PY3 = True
|
||||
base_str = (bytes, str)
|
||||
text_type = str
|
|
@ -0,0 +1,49 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .mbcssm import CP949_SM_MODEL
|
||||
|
||||
|
||||
class CP949Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(CP949Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||
# not different.
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "CP949"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
|
@ -0,0 +1,76 @@
|
|||
"""
|
||||
All of the Enums that are used throughout the chardet package.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
|
||||
class InputState(object):
|
||||
"""
|
||||
This enum represents the different states a universal detector can be in.
|
||||
"""
|
||||
PURE_ASCII = 0
|
||||
ESC_ASCII = 1
|
||||
HIGH_BYTE = 2
|
||||
|
||||
|
||||
class LanguageFilter(object):
|
||||
"""
|
||||
This enum represents the different language filters we can apply to a
|
||||
``UniversalDetector``.
|
||||
"""
|
||||
CHINESE_SIMPLIFIED = 0x01
|
||||
CHINESE_TRADITIONAL = 0x02
|
||||
JAPANESE = 0x04
|
||||
KOREAN = 0x08
|
||||
NON_CJK = 0x10
|
||||
ALL = 0x1F
|
||||
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
||||
CJK = CHINESE | JAPANESE | KOREAN
|
||||
|
||||
|
||||
class ProbingState(object):
|
||||
"""
|
||||
This enum represents the different states a prober can be in.
|
||||
"""
|
||||
DETECTING = 0
|
||||
FOUND_IT = 1
|
||||
NOT_ME = 2
|
||||
|
||||
|
||||
class MachineState(object):
|
||||
"""
|
||||
This enum represents the different states a state machine can be in.
|
||||
"""
|
||||
START = 0
|
||||
ERROR = 1
|
||||
ITS_ME = 2
|
||||
|
||||
|
||||
class SequenceLikelihood(object):
|
||||
"""
|
||||
This enum represents the likelihood of a character following the previous one.
|
||||
"""
|
||||
NEGATIVE = 0
|
||||
UNLIKELY = 1
|
||||
LIKELY = 2
|
||||
POSITIVE = 3
|
||||
|
||||
@classmethod
|
||||
def get_num_categories(cls):
|
||||
""":returns: The number of likelihood categories in the enum."""
|
||||
return 4
|
||||
|
||||
|
||||
class CharacterCategory(object):
|
||||
"""
|
||||
This enum represents the different categories language models for
|
||||
``SingleByteCharsetProber`` put characters into.
|
||||
|
||||
Anything less than CONTROL is considered a letter.
|
||||
"""
|
||||
UNDEFINED = 255
|
||||
LINE_BREAK = 254
|
||||
SYMBOL = 253
|
||||
DIGIT = 252
|
||||
CONTROL = 251
|
|
@ -0,0 +1,101 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import LanguageFilter, ProbingState, MachineState
|
||||
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
|
||||
ISO2022KR_SM_MODEL)
|
||||
|
||||
|
||||
class EscCharSetProber(CharSetProber):
|
||||
"""
|
||||
This CharSetProber uses a "code scheme" approach for detecting encodings,
|
||||
whereby easily recognizable escape or shift sequences are relied on to
|
||||
identify these encodings.
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.coding_sm = []
|
||||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.JAPANESE:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.KOREAN:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||
self.active_sm_count = None
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
self._state = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EscCharSetProber, self).reset()
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm:
|
||||
continue
|
||||
coding_sm.active = True
|
||||
coding_sm.reset()
|
||||
self.active_sm_count = len(self.coding_sm)
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._detected_charset
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._detected_language
|
||||
|
||||
def get_confidence(self):
|
||||
if self._detected_charset:
|
||||
return 0.99
|
||||
else:
|
||||
return 0.00
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm or not coding_sm.active:
|
||||
continue
|
||||
coding_state = coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
coding_sm.active = False
|
||||
self.active_sm_count -= 1
|
||||
if self.active_sm_count <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
self._detected_charset = coding_sm.get_coding_state_machine()
|
||||
self._detected_language = coding_sm.language
|
||||
return self.state
|
||||
|
||||
return self.state
|
|
@ -0,0 +1,246 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
HZ_CLS = (
|
||||
1,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,4,0,5,2,0, # 78 - 7f
|
||||
1,1,1,1,1,1,1,1, # 80 - 87
|
||||
1,1,1,1,1,1,1,1, # 88 - 8f
|
||||
1,1,1,1,1,1,1,1, # 90 - 97
|
||||
1,1,1,1,1,1,1,1, # 98 - 9f
|
||||
1,1,1,1,1,1,1,1, # a0 - a7
|
||||
1,1,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,1,1,1,1,1,1, # c0 - c7
|
||||
1,1,1,1,1,1,1,1, # c8 - cf
|
||||
1,1,1,1,1,1,1,1, # d0 - d7
|
||||
1,1,1,1,1,1,1,1, # d8 - df
|
||||
1,1,1,1,1,1,1,1, # e0 - e7
|
||||
1,1,1,1,1,1,1,1, # e8 - ef
|
||||
1,1,1,1,1,1,1,1, # f0 - f7
|
||||
1,1,1,1,1,1,1,1, # f8 - ff
|
||||
)
|
||||
|
||||
HZ_ST = (
|
||||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||
)
|
||||
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': HZ_ST,
|
||||
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||
'name': "HZ-GB-2312",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022CN_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,4,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022CN_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||
)
|
||||
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||
'class_factor': 9,
|
||||
'state_table': ISO2022CN_ST,
|
||||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-CN",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022JP_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,7,0,0,0, # 20 - 27
|
||||
3,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
6,0,4,0,8,0,0,0, # 40 - 47
|
||||
0,9,5,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022JP_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||
)
|
||||
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': ISO2022JP_ST,
|
||||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-JP",
|
||||
'language': 'Japanese'}
|
||||
|
||||
ISO2022KR_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,3,0,0,0, # 20 - 27
|
||||
0,4,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,5,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022KR_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||
)
|
||||
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': ISO2022KR_ST,
|
||||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-KR",
|
||||
'language': 'Korean'}
|
||||
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState, MachineState
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCJPDistributionAnalysis
|
||||
from .jpcntx import EUCJPContextAnalysis
|
||||
from .mbcssm import EUCJP_SM_MODEL
|
||||
|
||||
|
||||
class EUCJPProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCJPProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||
self.context_analyzer = EUCJPContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EUCJPProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-JP"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char, char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
|
@ -0,0 +1,195 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
|
||||
# 128 --> 0.79
|
||||
# 256 --> 0.92
|
||||
# 512 --> 0.986
|
||||
# 1024 --> 0.99944
|
||||
# 2048 --> 0.99999
|
||||
#
|
||||
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
|
||||
# Random Distribution Ration = 512 / (2350-512) = 0.279.
|
||||
#
|
||||
# Typical Distribution Ratio
|
||||
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
|
||||
|
||||
EUCKR_TABLE_SIZE = 2352
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCKR_CHAR_TO_FREQ_ORDER = (
|
||||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
||||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
||||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
||||
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
|
||||
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622,
|
||||
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750,
|
||||
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
|
||||
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
|
||||
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
|
||||
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19,
|
||||
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
|
||||
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
|
||||
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
|
||||
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
|
||||
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
|
||||
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
|
||||
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
|
||||
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
|
||||
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
|
||||
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
|
||||
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
|
||||
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
|
||||
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
|
||||
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
|
||||
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
|
||||
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885,
|
||||
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
|
||||
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
|
||||
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
|
||||
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841,
|
||||
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910,
|
||||
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610,
|
||||
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
|
||||
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939,
|
||||
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
|
||||
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934,
|
||||
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
|
||||
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
|
||||
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
|
||||
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
|
||||
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
|
||||
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
|
||||
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
|
||||
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7,
|
||||
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
|
||||
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
|
||||
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
|
||||
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250,
|
||||
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824,
|
||||
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
|
||||
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745,
|
||||
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61,
|
||||
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
|
||||
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032,
|
||||
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
|
||||
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
|
||||
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
|
||||
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
|
||||
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
|
||||
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
|
||||
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
|
||||
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075,
|
||||
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
|
||||
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
|
||||
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
|
||||
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
|
||||
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
|
||||
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
|
||||
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
|
||||
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274,
|
||||
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
|
||||
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721,
|
||||
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
|
||||
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463,
|
||||
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
|
||||
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285,
|
||||
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
|
||||
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10,
|
||||
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350,
|
||||
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
|
||||
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
|
||||
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
|
||||
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
|
||||
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
|
||||
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247,
|
||||
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
|
||||
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
|
||||
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259,
|
||||
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262,
|
||||
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
|
||||
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273,
|
||||
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
|
||||
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117,
|
||||
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
|
||||
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
|
||||
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312,
|
||||
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229,
|
||||
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315,
|
||||
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
|
||||
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170,
|
||||
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
|
||||
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
|
||||
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
|
||||
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
|
||||
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
|
||||
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
|
||||
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
|
||||
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
|
||||
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
|
||||
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
|
||||
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
|
||||
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
|
||||
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
|
||||
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
|
||||
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
|
||||
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
|
||||
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
|
||||
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485,
|
||||
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
|
||||
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
|
||||
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
|
||||
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
|
||||
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
|
||||
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494,
|
||||
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
|
||||
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
|
||||
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
|
||||
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
|
||||
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
|
||||
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
|
||||
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
|
||||
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544,
|
||||
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
|
||||
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
|
||||
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562,
|
||||
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
|
||||
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
|
||||
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
|
||||
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431,
|
||||
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
|
||||
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
|
||||
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406,
|
||||
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
|
||||
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
|
||||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
||||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
||||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
||||
)
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .mbcssm import EUCKR_SM_MODEL
|
||||
|
||||
|
||||
class EUCKRProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCKRProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-KR"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
|
@ -0,0 +1,387 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# EUCTW frequency table
|
||||
# Converted from big5 work
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http:#www.edu.tw:81/mandr/>
|
||||
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCTW_TABLE_SIZE = 5376
|
||||
|
||||
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
)
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCTWDistributionAnalysis
|
||||
from .mbcssm import EUCTW_SM_MODEL
|
||||
|
||||
class EUCTWProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCTWProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-TW"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Taiwan"
|
|
@ -0,0 +1,283 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# GB2312 most frequently used character table
|
||||
#
|
||||
# Char to FreqOrder table , from hz6763
|
||||
|
||||
# 512 --> 0.79 -- 0.79
|
||||
# 1024 --> 0.92 -- 0.13
|
||||
# 2048 --> 0.98 -- 0.06
|
||||
# 6768 --> 1.00 -- 0.02
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
|
||||
# Random Distribution Ration = 512 / (3755 - 512) = 0.157
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
|
||||
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
|
||||
|
||||
GB2312_TABLE_SIZE = 3760
|
||||
|
||||
GB2312_CHAR_TO_FREQ_ORDER = (
|
||||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
||||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
||||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
||||
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670,
|
||||
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820,
|
||||
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585,
|
||||
152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566,
|
||||
1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575,
|
||||
2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853,
|
||||
3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061,
|
||||
544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155,
|
||||
1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406,
|
||||
927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816,
|
||||
2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606,
|
||||
360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023,
|
||||
2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414,
|
||||
1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513,
|
||||
3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052,
|
||||
198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570,
|
||||
1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575,
|
||||
253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250,
|
||||
2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506,
|
||||
1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26,
|
||||
3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835,
|
||||
1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686,
|
||||
2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054,
|
||||
1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894,
|
||||
585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105,
|
||||
3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403,
|
||||
3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694,
|
||||
252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873,
|
||||
3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940,
|
||||
836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121,
|
||||
1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648,
|
||||
3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992,
|
||||
2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233,
|
||||
1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157,
|
||||
755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807,
|
||||
1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094,
|
||||
4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258,
|
||||
887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478,
|
||||
3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152,
|
||||
3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909,
|
||||
509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272,
|
||||
1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221,
|
||||
2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252,
|
||||
1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301,
|
||||
1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254,
|
||||
389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070,
|
||||
3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461,
|
||||
3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360,
|
||||
4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124,
|
||||
296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535,
|
||||
3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243,
|
||||
1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713,
|
||||
1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071,
|
||||
4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442,
|
||||
215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946,
|
||||
814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257,
|
||||
3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180,
|
||||
1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427,
|
||||
602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781,
|
||||
1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724,
|
||||
2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937,
|
||||
930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943,
|
||||
432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789,
|
||||
396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552,
|
||||
3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246,
|
||||
4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451,
|
||||
3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310,
|
||||
750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860,
|
||||
2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297,
|
||||
2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780,
|
||||
2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745,
|
||||
776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936,
|
||||
2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032,
|
||||
968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657,
|
||||
163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414,
|
||||
220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976,
|
||||
3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436,
|
||||
2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254,
|
||||
2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536,
|
||||
1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238,
|
||||
18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059,
|
||||
2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741,
|
||||
90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447,
|
||||
286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601,
|
||||
1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269,
|
||||
1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894,
|
||||
915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173,
|
||||
681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994,
|
||||
1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956,
|
||||
2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437,
|
||||
3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154,
|
||||
2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240,
|
||||
2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143,
|
||||
2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634,
|
||||
3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472,
|
||||
1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541,
|
||||
1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143,
|
||||
2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312,
|
||||
1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414,
|
||||
3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754,
|
||||
1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424,
|
||||
1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302,
|
||||
3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739,
|
||||
795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004,
|
||||
2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484,
|
||||
1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739,
|
||||
4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535,
|
||||
1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641,
|
||||
1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307,
|
||||
3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573,
|
||||
1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533,
|
||||
47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965,
|
||||
504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99,
|
||||
1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280,
|
||||
160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505,
|
||||
1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012,
|
||||
1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039,
|
||||
744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982,
|
||||
3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530,
|
||||
4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392,
|
||||
3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656,
|
||||
2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220,
|
||||
2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766,
|
||||
1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535,
|
||||
3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728,
|
||||
2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338,
|
||||
1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627,
|
||||
1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885,
|
||||
125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411,
|
||||
2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671,
|
||||
2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162,
|
||||
3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774,
|
||||
4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524,
|
||||
3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346,
|
||||
180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040,
|
||||
3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188,
|
||||
2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280,
|
||||
1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131,
|
||||
259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947,
|
||||
774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970,
|
||||
3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814,
|
||||
4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557,
|
||||
2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997,
|
||||
1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972,
|
||||
1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369,
|
||||
766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376,
|
||||
1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480,
|
||||
3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610,
|
||||
955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128,
|
||||
642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769,
|
||||
1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207,
|
||||
57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392,
|
||||
1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623,
|
||||
193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782,
|
||||
2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650,
|
||||
158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478,
|
||||
2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773,
|
||||
2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007,
|
||||
1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323,
|
||||
1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598,
|
||||
2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961,
|
||||
819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302,
|
||||
1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409,
|
||||
1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683,
|
||||
2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191,
|
||||
2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616,
|
||||
3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302,
|
||||
1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774,
|
||||
4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147,
|
||||
571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731,
|
||||
845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464,
|
||||
3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377,
|
||||
1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315,
|
||||
470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557,
|
||||
3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903,
|
||||
1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060,
|
||||
4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261,
|
||||
1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092,
|
||||
2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810,
|
||||
1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708,
|
||||
498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658,
|
||||
1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871,
|
||||
3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503,
|
||||
448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229,
|
||||
2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112,
|
||||
136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504,
|
||||
1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389,
|
||||
1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27,
|
||||
1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542,
|
||||
3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861,
|
||||
2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845,
|
||||
3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700,
|
||||
3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469,
|
||||
3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582,
|
||||
996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999,
|
||||
2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274,
|
||||
786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020,
|
||||
2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601,
|
||||
12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628,
|
||||
1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31,
|
||||
475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668,
|
||||
233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778,
|
||||
1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169,
|
||||
3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667,
|
||||
3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881,
|
||||
1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276,
|
||||
1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320,
|
||||
3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751,
|
||||
2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432,
|
||||
2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772,
|
||||
1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843,
|
||||
3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116,
|
||||
451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904,
|
||||
4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652,
|
||||
1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664,
|
||||
2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770,
|
||||
3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283,
|
||||
3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626,
|
||||
1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713,
|
||||
768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333,
|
||||
391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062,
|
||||
2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555,
|
||||
931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014,
|
||||
1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510,
|
||||
386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015,
|
||||
1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459,
|
||||
1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390,
|
||||
1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238,
|
||||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
||||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
||||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
|
||||
)
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import GB2312DistributionAnalysis
|
||||
from .mbcssm import GB2312_SM_MODEL
|
||||
|
||||
class GB2312Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(GB2312Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "GB2312"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
|
@ -0,0 +1,292 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Shy Shalom
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
|
||||
### General ideas of the Hebrew charset recognition ###
|
||||
#
|
||||
# Four main charsets exist in Hebrew:
|
||||
# "ISO-8859-8" - Visual Hebrew
|
||||
# "windows-1255" - Logical Hebrew
|
||||
# "ISO-8859-8-I" - Logical Hebrew
|
||||
# "x-mac-hebrew" - ?? Logical Hebrew ??
|
||||
#
|
||||
# Both "ISO" charsets use a completely identical set of code points, whereas
|
||||
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
|
||||
# these code points. windows-1255 defines additional characters in the range
|
||||
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
|
||||
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
|
||||
# x-mac-hebrew defines similar additional code points but with a different
|
||||
# mapping.
|
||||
#
|
||||
# As far as an average Hebrew text with no diacritics is concerned, all four
|
||||
# charsets are identical with respect to code points. Meaning that for the
|
||||
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
|
||||
# (including final letters).
|
||||
#
|
||||
# The dominant difference between these charsets is their directionality.
|
||||
# "Visual" directionality means that the text is ordered as if the renderer is
|
||||
# not aware of a BIDI rendering algorithm. The renderer sees the text and
|
||||
# draws it from left to right. The text itself when ordered naturally is read
|
||||
# backwards. A buffer of Visual Hebrew generally looks like so:
|
||||
# "[last word of first line spelled backwards] [whole line ordered backwards
|
||||
# and spelled backwards] [first word of first line spelled backwards]
|
||||
# [end of line] [last word of second line] ... etc' "
|
||||
# adding punctuation marks, numbers and English text to visual text is
|
||||
# naturally also "visual" and from left to right.
|
||||
#
|
||||
# "Logical" directionality means the text is ordered "naturally" according to
|
||||
# the order it is read. It is the responsibility of the renderer to display
|
||||
# the text from right to left. A BIDI algorithm is used to place general
|
||||
# punctuation marks, numbers and English text in the text.
|
||||
#
|
||||
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
|
||||
# what little evidence I could find, it seems that its general directionality
|
||||
# is Logical.
|
||||
#
|
||||
# To sum up all of the above, the Hebrew probing mechanism knows about two
|
||||
# charsets:
|
||||
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
|
||||
# backwards while line order is natural. For charset recognition purposes
|
||||
# the line order is unimportant (In fact, for this implementation, even
|
||||
# word order is unimportant).
|
||||
# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
|
||||
#
|
||||
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
|
||||
# specifically identified.
|
||||
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
|
||||
# that contain special punctuation marks or diacritics is displayed with
|
||||
# some unconverted characters showing as question marks. This problem might
|
||||
# be corrected using another model prober for x-mac-hebrew. Due to the fact
|
||||
# that x-mac-hebrew texts are so rare, writing another model prober isn't
|
||||
# worth the effort and performance hit.
|
||||
#
|
||||
#### The Prober ####
|
||||
#
|
||||
# The prober is divided between two SBCharSetProbers and a HebrewProber,
|
||||
# all of which are managed, created, fed data, inquired and deleted by the
|
||||
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
|
||||
# fact some kind of Hebrew, Logical or Visual. The final decision about which
|
||||
# one is it is made by the HebrewProber by combining final-letter scores
|
||||
# with the scores of the two SBCharSetProbers to produce a final answer.
|
||||
#
|
||||
# The SBCSGroupProber is responsible for stripping the original text of HTML
|
||||
# tags, English characters, numbers, low-ASCII punctuation characters, spaces
|
||||
# and new lines. It reduces any sequence of such characters to a single space.
|
||||
# The buffer fed to each prober in the SBCS group prober is pure text in
|
||||
# high-ASCII.
|
||||
# The two SBCharSetProbers (model probers) share the same language model:
|
||||
# Win1255Model.
|
||||
# The first SBCharSetProber uses the model normally as any other
|
||||
# SBCharSetProber does, to recognize windows-1255, upon which this model was
|
||||
# built. The second SBCharSetProber is told to make the pair-of-letter
|
||||
# lookup in the language model backwards. This in practice exactly simulates
|
||||
# a visual Hebrew model using the windows-1255 logical Hebrew model.
|
||||
#
|
||||
# The HebrewProber is not using any language model. All it does is look for
|
||||
# final-letter evidence suggesting the text is either logical Hebrew or visual
|
||||
# Hebrew. Disjointed from the model probers, the results of the HebrewProber
|
||||
# alone are meaningless. HebrewProber always returns 0.00 as confidence
|
||||
# since it never identifies a charset by itself. Instead, the pointer to the
|
||||
# HebrewProber is passed to the model probers as a helper "Name Prober".
|
||||
# When the Group prober receives a positive identification from any prober,
|
||||
# it asks for the name of the charset identified. If the prober queried is a
|
||||
# Hebrew model prober, the model prober forwards the call to the
|
||||
# HebrewProber to make the final decision. In the HebrewProber, the
|
||||
# decision is made according to the final-letters scores maintained and Both
|
||||
# model probers scores. The answer is returned in the form of the name of the
|
||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
super(HebrewProber, self).__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._prev = ' '
|
||||
self._before_prev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._logical_prober = logicalProber
|
||||
self._visual_prober = visualProber
|
||||
|
||||
def is_final(self, c):
|
||||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||
self.FINAL_PE, self.FINAL_TSADI]
|
||||
|
||||
def is_non_final(self, c):
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
|
||||
# apostrophe is converted to a space in FilterWithoutEnglishLetters
|
||||
# causing the Non-Final tsadi to appear at an end of a word even
|
||||
# though this is not the case in the original text.
|
||||
# The letters Pe and Kaf rarely display a related behavior of not being
|
||||
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
|
||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||
# benefit of these letters as Non-Final letters outweighs the damage
|
||||
# since these words are quite rare.
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||
self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, byte_str):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
# The following cases are checked:
|
||||
# 1) A word longer than 1 letter, ending with a final letter. This is
|
||||
# an indication that the text is laid out "naturally" since the
|
||||
# final letter really appears at the end. +1 for logical score.
|
||||
# 2) A word longer than 1 letter, ending with a Non-Final letter. In
|
||||
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
|
||||
# should not end with the Non-Final form of that letter. Exceptions
|
||||
# to this rule are mentioned above in isNonFinal(). This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score
|
||||
# 3) A word longer than 1 letter, starting with a final letter. Final
|
||||
# letters should not appear at the beginning of a word. This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score.
|
||||
#
|
||||
# The visual score and logical score are accumulated throughout the
|
||||
# text and are finally checked against each other in GetCharSetName().
|
||||
# No checking for final letters in the middle of words is done since
|
||||
# that case is not an indication for either Logical or Visual text.
|
||||
#
|
||||
# We automatically filter out all 7-bit characters (replace them with
|
||||
# spaces) so the word boundary detection works properly. [MAP]
|
||||
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
# Both model probers say it's not them. No reason to continue.
|
||||
return ProbingState.NOT_ME
|
||||
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in byte_str:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
if self._before_prev != ' ':
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._prev):
|
||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||
self._final_char_logical_score += 1
|
||||
elif self.is_non_final(self._prev):
|
||||
# case (2) [-2:not space][-1:Non-Final letter][
|
||||
# cur:space]
|
||||
self._final_char_visual_score += 1
|
||||
else:
|
||||
# Not standing on a space
|
||||
if ((self._before_prev == ' ') and
|
||||
(self.is_final(self._prev)) and (cur != ' ')):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._final_char_visual_score += 1
|
||||
self._before_prev = self._prev
|
||||
self._prev = cur
|
||||
|
||||
# Forever detecting, till the end or until both model probers return
|
||||
# ProbingState.NOT_ME (handled above)
|
||||
return ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# It's not dominant enough, try to rely on the model scores instead.
|
||||
modelsub = (self._logical_prober.get_confidence()
|
||||
- self._visual_prober.get_confidence())
|
||||
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# Still no good, back to final letter distance, maybe it'll save the
|
||||
# day.
|
||||
if finalsub < 0.0:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||
# Logical.
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return 'Hebrew'
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||
return ProbingState.NOT_ME
|
||||
return ProbingState.DETECTING
|
|
@ -0,0 +1,325 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
#
|
||||
# Japanese frequency table, applied to both S-JIS and EUC-JP
|
||||
# They are sorted in order.
|
||||
|
||||
# 128 --> 0.77094
|
||||
# 256 --> 0.85710
|
||||
# 512 --> 0.92635
|
||||
# 1024 --> 0.97130
|
||||
# 2048 --> 0.99431
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
|
||||
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
|
||||
#
|
||||
# Typical Distribution Ratio, 25% of IDR
|
||||
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
JIS_TABLE_SIZE = 4368
|
||||
|
||||
JIS_CHAR_TO_FREQ_ORDER = (
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
||||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64
|
||||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80
|
||||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96
|
||||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112
|
||||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128
|
||||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144
|
||||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160
|
||||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176
|
||||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192
|
||||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208
|
||||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224
|
||||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240
|
||||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256
|
||||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272
|
||||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288
|
||||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304
|
||||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320
|
||||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336
|
||||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352
|
||||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368
|
||||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384
|
||||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400
|
||||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416
|
||||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432
|
||||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448
|
||||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464
|
||||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480
|
||||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496
|
||||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512
|
||||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528
|
||||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544
|
||||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560
|
||||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576
|
||||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592
|
||||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608
|
||||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624
|
||||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640
|
||||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656
|
||||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672
|
||||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688
|
||||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704
|
||||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720
|
||||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736
|
||||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752
|
||||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768
|
||||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784
|
||||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800
|
||||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816
|
||||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832
|
||||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848
|
||||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864
|
||||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880
|
||||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912
|
||||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928
|
||||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944
|
||||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960
|
||||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976
|
||||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992
|
||||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008
|
||||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024
|
||||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040
|
||||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056
|
||||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072
|
||||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088
|
||||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104
|
||||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120
|
||||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136
|
||||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152
|
||||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168
|
||||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184
|
||||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200
|
||||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232
|
||||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248
|
||||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264
|
||||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280
|
||||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296
|
||||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312
|
||||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328
|
||||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344
|
||||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360
|
||||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376
|
||||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392
|
||||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408
|
||||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424
|
||||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440
|
||||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456
|
||||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472
|
||||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488
|
||||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504
|
||||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520
|
||||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536
|
||||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552
|
||||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568
|
||||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584
|
||||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600
|
||||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616
|
||||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632
|
||||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648
|
||||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664
|
||||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680
|
||||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696
|
||||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712
|
||||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728
|
||||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744
|
||||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760
|
||||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776
|
||||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792
|
||||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808
|
||||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824
|
||||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840
|
||||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856
|
||||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872
|
||||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888
|
||||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904
|
||||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920
|
||||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936
|
||||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952
|
||||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968
|
||||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984
|
||||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000
|
||||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016
|
||||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032
|
||||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048
|
||||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064
|
||||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080
|
||||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096
|
||||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112
|
||||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128
|
||||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144
|
||||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160
|
||||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176
|
||||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192
|
||||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208
|
||||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224
|
||||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240
|
||||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256
|
||||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272
|
||||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288
|
||||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304
|
||||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320
|
||||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336
|
||||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352
|
||||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368
|
||||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384
|
||||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400
|
||||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416
|
||||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432
|
||||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448
|
||||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464
|
||||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480
|
||||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496
|
||||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512
|
||||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528
|
||||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544
|
||||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560
|
||||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576
|
||||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592
|
||||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608
|
||||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624
|
||||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640
|
||||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656
|
||||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672
|
||||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688
|
||||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704
|
||||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720
|
||||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736
|
||||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752
|
||||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768
|
||||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784
|
||||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800
|
||||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816
|
||||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832
|
||||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848
|
||||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864
|
||||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880
|
||||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896
|
||||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912
|
||||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928
|
||||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944
|
||||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960
|
||||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976
|
||||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992
|
||||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008
|
||||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024
|
||||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040
|
||||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056
|
||||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072
|
||||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088
|
||||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104
|
||||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120
|
||||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136
|
||||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152
|
||||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168
|
||||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184
|
||||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200
|
||||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216
|
||||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232
|
||||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248
|
||||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264
|
||||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280
|
||||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296
|
||||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312
|
||||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328
|
||||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344
|
||||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360
|
||||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376
|
||||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392
|
||||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408
|
||||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424
|
||||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440
|
||||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456
|
||||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472
|
||||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488
|
||||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504
|
||||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520
|
||||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536
|
||||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552
|
||||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568
|
||||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584
|
||||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600
|
||||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616
|
||||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632
|
||||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648
|
||||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664
|
||||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680
|
||||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696
|
||||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712
|
||||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728
|
||||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744
|
||||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760
|
||||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776
|
||||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792
|
||||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808
|
||||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824
|
||||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840
|
||||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856
|
||||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872
|
||||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888
|
||||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904
|
||||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920
|
||||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936
|
||||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952
|
||||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968
|
||||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984
|
||||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000
|
||||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016
|
||||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032
|
||||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048
|
||||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064
|
||||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080
|
||||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096
|
||||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112
|
||||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128
|
||||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144
|
||||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160
|
||||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176
|
||||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192
|
||||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208
|
||||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224
|
||||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240
|
||||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256
|
||||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272
|
||||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288
|
||||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304
|
||||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
||||
)
|
||||
|
||||
|
|
@ -0,0 +1,233 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
jp2CharContext = (
|
||||
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
|
||||
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
|
||||
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
|
||||
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
|
||||
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
|
||||
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
|
||||
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
|
||||
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
|
||||
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
|
||||
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
|
||||
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
|
||||
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
|
||||
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
|
||||
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
|
||||
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
|
||||
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
|
||||
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
|
||||
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
|
||||
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
|
||||
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
|
||||
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
|
||||
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
|
||||
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
|
||||
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
|
||||
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
|
||||
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
|
||||
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
|
||||
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
|
||||
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
|
||||
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
|
||||
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
|
||||
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
|
||||
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
|
||||
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
|
||||
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
|
||||
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
|
||||
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
|
||||
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
|
||||
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
|
||||
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
|
||||
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
|
||||
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
|
||||
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
|
||||
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
|
||||
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
|
||||
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
|
||||
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
|
||||
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
|
||||
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
|
||||
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
|
||||
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
|
||||
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
|
||||
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
|
||||
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
|
||||
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
|
||||
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
|
||||
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
|
||||
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
|
||||
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
|
||||
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
|
||||
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
|
||||
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
|
||||
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
|
||||
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
|
||||
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
|
||||
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
|
||||
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
|
||||
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
|
||||
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
|
||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||
)
|
||||
|
||||
class JapaneseContextAnalysis(object):
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
# if last byte in current buffer is not the last byte of a character,
|
||||
# we need to know how many bytes to skip in next buffer
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1 # The order of previous char
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
|
||||
def feed(self, byte_str, num_bytes):
|
||||
if self._done:
|
||||
return
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
# buffers. In case the last one or two byte in last buffer is not
|
||||
# complete, we record how many byte needed to complete that character
|
||||
# and skip these bytes here. We can choose to record those bytes as
|
||||
# well and analyse the character once it is complete, but since a
|
||||
# character will not make much difference, by simply skipping
|
||||
# this character will simply our logic and improve performance.
|
||||
i = self._need_to_skip_char_num
|
||||
while i < num_bytes:
|
||||
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||
i += char_len
|
||||
if i > num_bytes:
|
||||
self._need_to_skip_char_num = i - num_bytes
|
||||
self._last_char_order = -1
|
||||
else:
|
||||
if (order != -1) and (self._last_char_order != -1):
|
||||
self._total_rel += 1
|
||||
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||
self._done = True
|
||||
break
|
||||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
else:
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, byte_str):
|
||||
return -1, 1
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISContextAnalysis, self).__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||
char_len = 2
|
||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||
self._charset_name = "CP932"
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||
return second_char - 0x9F, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||
char_len = 2
|
||||
elif first_char == 0x8F:
|
||||
char_len = 3
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||
return second_char - 0xA1, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
# this table is modified base on win1251BulgarianCharToOrderMap, so
|
||||
# only number <64 is sure valid
|
||||
|
||||
Latin5_BulgarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
|
||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
|
||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
|
||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
|
||||
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80
|
||||
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90
|
||||
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0
|
||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0
|
||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0
|
||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0
|
||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0
|
||||
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0
|
||||
)
|
||||
|
||||
win1251BulgarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40
|
||||
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50
|
||||
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60
|
||||
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70
|
||||
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80
|
||||
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90
|
||||
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0
|
||||
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0
|
||||
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0
|
||||
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0
|
||||
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0
|
||||
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 96.9392%
|
||||
# first 1024 sequences:3.0618%
|
||||
# rest sequences: 0.2992%
|
||||
# negative sequences: 0.0020%
|
||||
BulgarianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
|
||||
3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
|
||||
0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
|
||||
0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
|
||||
2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
|
||||
3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
|
||||
1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
|
||||
3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
|
||||
1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
|
||||
2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
|
||||
2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
|
||||
3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
|
||||
1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
|
||||
2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
|
||||
2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
|
||||
3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
|
||||
1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
|
||||
2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
|
||||
2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
|
||||
2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
|
||||
1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
|
||||
2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
|
||||
1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
|
||||
3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
|
||||
1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
|
||||
3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
|
||||
1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
|
||||
2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
|
||||
1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
|
||||
2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
|
||||
1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
|
||||
2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
|
||||
1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
|
||||
1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
|
||||
1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
|
||||
2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
|
||||
1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
|
||||
2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
|
||||
1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
|
||||
0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
|
||||
1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
|
||||
1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
|
||||
0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
|
||||
1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
|
||||
1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
|
||||
1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
)
|
||||
|
||||
Latin5BulgarianModel = {
|
||||
'char_to_order_map': Latin5_BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Bulgairan',
|
||||
}
|
||||
|
||||
Win1251BulgarianModel = {
|
||||
'char_to_order_map': win1251BulgarianCharToOrderMap,
|
||||
'precedence_matrix': BulgarianLangModel,
|
||||
'typical_positive_ratio': 0.969392,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Bulgarian',
|
||||
}
|
|
@ -0,0 +1,333 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# KOI8-R language model
|
||||
# Character Mapping Table:
|
||||
KOI8R_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90
|
||||
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0
|
||||
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0
|
||||
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0
|
||||
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0
|
||||
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0
|
||||
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
|
||||
)
|
||||
|
||||
win1251_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
)
|
||||
|
||||
latin5_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
macCyrillic_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
|
||||
)
|
||||
|
||||
IBM855_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
|
||||
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
|
||||
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
|
||||
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
|
||||
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
|
||||
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
|
||||
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
|
||||
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
|
||||
)
|
||||
|
||||
IBM866_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40
|
||||
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50
|
||||
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60
|
||||
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70
|
||||
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
|
||||
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
|
||||
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
|
||||
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
|
||||
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
|
||||
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
|
||||
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
|
||||
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 97.6601%
|
||||
# first 1024 sequences: 2.3389%
|
||||
# rest sequences: 0.1237%
|
||||
# negative sequences: 0.0009%
|
||||
RussianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
||||
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
|
||||
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
|
||||
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
|
||||
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
|
||||
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
|
||||
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
|
||||
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
|
||||
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
|
||||
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
|
||||
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
|
||||
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
|
||||
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
|
||||
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
||||
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
|
||||
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
|
||||
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
|
||||
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
|
||||
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
|
||||
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
|
||||
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
||||
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
|
||||
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
|
||||
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
|
||||
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
|
||||
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
|
||||
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
|
||||
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
|
||||
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
|
||||
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
|
||||
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
|
||||
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
|
||||
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
|
||||
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
|
||||
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
|
||||
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
|
||||
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
|
||||
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
|
||||
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
|
||||
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
||||
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
|
||||
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
|
||||
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
|
||||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
|
||||
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
|
||||
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||
)
|
||||
|
||||
Koi8rModel = {
|
||||
'char_to_order_map': KOI8R_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "KOI8-R",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Win1251CyrillicModel = {
|
||||
'char_to_order_map': win1251_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1251",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Latin5CyrillicModel = {
|
||||
'char_to_order_map': latin5_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-5",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
MacCyrillicModel = {
|
||||
'char_to_order_map': macCyrillic_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "MacCyrillic",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm866Model = {
|
||||
'char_to_order_map': IBM866_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM866",
|
||||
'language': 'Russian',
|
||||
}
|
||||
|
||||
Ibm855Model = {
|
||||
'char_to_order_map': IBM855_char_to_order_map,
|
||||
'precedence_matrix': RussianLangModel,
|
||||
'typical_positive_ratio': 0.976601,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "IBM855",
|
||||
'language': 'Russian',
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin7_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
||||
253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
||||
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
win1253_char_to_order_map = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50
|
||||
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90
|
||||
253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0
|
||||
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0
|
||||
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 98.2851%
|
||||
# first 1024 sequences:1.7001%
|
||||
# rest sequences: 0.0359%
|
||||
# negative sequences: 0.0148%
|
||||
GreekLangModel = (
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
|
||||
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
|
||||
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
|
||||
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
|
||||
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
|
||||
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
|
||||
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
|
||||
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
|
||||
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
|
||||
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
|
||||
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
|
||||
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
|
||||
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
||||
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
|
||||
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
|
||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin7GreekModel = {
|
||||
'char_to_order_map': Latin7_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "ISO-8859-7",
|
||||
'language': 'Greek',
|
||||
}
|
||||
|
||||
Win1253GreekModel = {
|
||||
'char_to_order_map': win1253_char_to_order_map,
|
||||
'precedence_matrix': GreekLangModel,
|
||||
'typical_positive_ratio': 0.982851,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1253",
|
||||
'language': 'Greek',
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Simon Montagu
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Shoshannah Forbes - original C code (?)
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Windows-1255 language model
|
||||
# Character Mapping Table:
|
||||
WIN1255_CHAR_TO_ORDER_MAP = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40
|
||||
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50
|
||||
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60
|
||||
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70
|
||||
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
|
||||
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
|
||||
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
|
||||
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
|
||||
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
|
||||
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
|
||||
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
|
||||
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 98.4004%
|
||||
# first 1024 sequences: 1.5981%
|
||||
# rest sequences: 0.087%
|
||||
# negative sequences: 0.0015%
|
||||
HEBREW_LANG_MODEL = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
|
||||
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
|
||||
1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
|
||||
1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
|
||||
1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
|
||||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
|
||||
1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
|
||||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
|
||||
0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
|
||||
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
|
||||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
|
||||
0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
|
||||
0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
|
||||
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
|
||||
0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
|
||||
0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
|
||||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
|
||||
0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
|
||||
0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
|
||||
0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
|
||||
0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
|
||||
0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
|
||||
3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
|
||||
0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
|
||||
0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
|
||||
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
|
||||
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
|
||||
0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
|
||||
1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
|
||||
0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
|
||||
1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
|
||||
2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
|
||||
0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
|
||||
)
|
||||
|
||||
Win1255HebrewModel = {
|
||||
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP,
|
||||
'precedence_matrix': HEBREW_LANG_MODEL,
|
||||
'typical_positive_ratio': 0.984004,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "windows-1255",
|
||||
'language': 'Hebrew',
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin2_HungarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
||||
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
||||
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
|
||||
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
|
||||
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
|
||||
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
||||
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
|
||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
|
||||
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
|
||||
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
||||
)
|
||||
|
||||
win1250HungarianCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
|
||||
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
|
||||
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
|
||||
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
|
||||
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
|
||||
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
|
||||
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
|
||||
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
|
||||
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
|
||||
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
|
||||
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
|
||||
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 94.7368%
|
||||
# first 1024 sequences:5.2623%
|
||||
# rest sequences: 0.8894%
|
||||
# negative sequences: 0.0009%
|
||||
HungarianLangModel = (
|
||||
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
|
||||
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
|
||||
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
|
||||
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
|
||||
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
|
||||
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
|
||||
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
|
||||
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
|
||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
|
||||
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
|
||||
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
|
||||
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
|
||||
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
|
||||
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
|
||||
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
||||
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
|
||||
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
|
||||
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
|
||||
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
|
||||
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
|
||||
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
|
||||
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
|
||||
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
|
||||
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
|
||||
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
|
||||
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
|
||||
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
|
||||
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
|
||||
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
|
||||
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
|
||||
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
|
||||
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
|
||||
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
|
||||
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
|
||||
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
|
||||
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
|
||||
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
|
||||
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
|
||||
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
|
||||
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
|
||||
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
|
||||
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
|
||||
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
|
||||
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
|
||||
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
|
||||
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
|
||||
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
|
||||
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
|
||||
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
|
||||
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
|
||||
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
|
||||
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
|
||||
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
|
||||
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin2HungarianModel = {
|
||||
'char_to_order_map': Latin2_HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-2",
|
||||
'language': 'Hungarian',
|
||||
}
|
||||
|
||||
Win1250HungarianModel = {
|
||||
'char_to_order_map': win1250HungarianCharToOrderMap,
|
||||
'precedence_matrix': HungarianLangModel,
|
||||
'typical_positive_ratio': 0.947368,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "windows-1250",
|
||||
'language': 'Hungarian',
|
||||
}
|
|
@ -0,0 +1,199 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# The following result for thai was collected from a limited sample (1M).
|
||||
|
||||
# Character Mapping Table:
|
||||
TIS620CharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
|
||||
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
|
||||
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
|
||||
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40
|
||||
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50
|
||||
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60
|
||||
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70
|
||||
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
|
||||
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
|
||||
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
|
||||
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
|
||||
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
|
||||
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
|
||||
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
|
||||
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
|
||||
)
|
||||
|
||||
# Model Table:
|
||||
# total sequences: 100%
|
||||
# first 512 sequences: 92.6386%
|
||||
# first 1024 sequences:7.3177%
|
||||
# rest sequences: 1.0230%
|
||||
# negative sequences: 0.0436%
|
||||
ThaiLangModel = (
|
||||
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
|
||||
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
|
||||
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
|
||||
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
|
||||
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
|
||||
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
|
||||
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
|
||||
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
|
||||
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
|
||||
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
|
||||
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
|
||||
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
|
||||
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
|
||||
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
|
||||
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
|
||||
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
|
||||
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
|
||||
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
|
||||
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
|
||||
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
|
||||
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
|
||||
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
|
||||
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
|
||||
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
|
||||
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
|
||||
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
|
||||
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
|
||||
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
|
||||
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
|
||||
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
|
||||
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
|
||||
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
|
||||
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
|
||||
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
|
||||
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
|
||||
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
|
||||
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
|
||||
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
|
||||
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
|
||||
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
|
||||
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
|
||||
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
|
||||
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
|
||||
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
|
||||
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
|
||||
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
|
||||
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
|
||||
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
|
||||
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
|
||||
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
|
||||
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
|
||||
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
|
||||
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
|
||||
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
|
||||
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
TIS620ThaiModel = {
|
||||
'char_to_order_map': TIS620CharToOrderMap,
|
||||
'precedence_matrix': ThaiLangModel,
|
||||
'typical_positive_ratio': 0.926386,
|
||||
'keep_english_letter': False,
|
||||
'charset_name': "TIS-620",
|
||||
'language': 'Thai',
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Özgür Baskın - Turkish Language Model
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# 255: Control characters that usually does not exist in any text
|
||||
# 254: Carriage/Return
|
||||
# 253: symbol (punctuation) that does not belong to word
|
||||
# 252: 0 - 9
|
||||
|
||||
# Character Mapping Table:
|
||||
Latin5_TurkishCharToOrderMap = (
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
|
||||
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
|
||||
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
|
||||
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
|
||||
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
|
||||
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
|
||||
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
|
||||
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
|
||||
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
|
||||
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
|
||||
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
|
||||
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
|
||||
)
|
||||
|
||||
TurkishLangModel = (
|
||||
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
|
||||
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
|
||||
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
|
||||
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
|
||||
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
|
||||
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
|
||||
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
|
||||
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
|
||||
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
|
||||
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
|
||||
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
|
||||
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
|
||||
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
|
||||
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
|
||||
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
|
||||
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
|
||||
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
|
||||
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
|
||||
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
|
||||
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
|
||||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
|
||||
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
|
||||
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
|
||||
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
|
||||
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
|
||||
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
|
||||
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
|
||||
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
||||
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
|
||||
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
|
||||
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
|
||||
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
|
||||
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
|
||||
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
|
||||
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
|
||||
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
|
||||
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
|
||||
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
|
||||
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
|
||||
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
|
||||
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
|
||||
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
|
||||
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
|
||||
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
|
||||
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
|
||||
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
|
||||
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
|
||||
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
|
||||
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
|
||||
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
|
||||
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
|
||||
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
|
||||
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
|
||||
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
|
||||
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
|
||||
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
|
||||
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
|
||||
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
|
||||
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
|
||||
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
|
||||
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
|
||||
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
|
||||
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
|
||||
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
|
||||
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
|
||||
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
|
||||
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
|
||||
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
)
|
||||
|
||||
Latin5TurkishModel = {
|
||||
'char_to_order_map': Latin5_TurkishCharToOrderMap,
|
||||
'precedence_matrix': TurkishLangModel,
|
||||
'typical_positive_ratio': 0.970290,
|
||||
'keep_english_letter': True,
|
||||
'charset_name': "ISO-8859-9",
|
||||
'language': 'Turkish',
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
FREQ_CAT_NUM = 4
|
||||
|
||||
UDF = 0 # undefined
|
||||
OTH = 1 # other
|
||||
ASC = 2 # ascii capital letter
|
||||
ASS = 3 # ascii small letter
|
||||
ACV = 4 # accent capital vowel
|
||||
ACO = 5 # accent capital other
|
||||
ASV = 6 # accent small vowel
|
||||
ASO = 7 # accent small other
|
||||
CLASS_NUM = 8 # total classes
|
||||
|
||||
Latin1_CharToClass = (
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
|
||||
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
|
||||
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
|
||||
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
|
||||
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
|
||||
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
|
||||
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
|
||||
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
|
||||
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
|
||||
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
|
||||
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
|
||||
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
|
||||
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
|
||||
)
|
||||
|
||||
# 0 : illegal
|
||||
# 1 : very unlikely
|
||||
# 2 : normal
|
||||
# 3 : very likely
|
||||
Latin1ClassModel = (
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
||||
0, 3, 3, 3, 1, 1, 3, 3, # ASS
|
||||
0, 3, 3, 3, 1, 2, 1, 2, # ACV
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ACO
|
||||
0, 3, 1, 3, 1, 1, 1, 3, # ASV
|
||||
0, 3, 1, 3, 1, 1, 3, 3, # ASO
|
||||
)
|
||||
|
||||
|
||||
class Latin1Prober(CharSetProber):
|
||||
def __init__(self):
|
||||
super(Latin1Prober, self).__init__()
|
||||
self._last_char_class = None
|
||||
self._freq_counter = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
CharSetProber.reset(self)
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "ISO-8859-1"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
byte_str = self.filter_with_english_letters(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = Latin1_CharToClass[c]
|
||||
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
|
||||
+ char_class]
|
||||
if freq == 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
self._freq_counter[freq] += 1
|
||||
self._last_char_class = char_class
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
total = sum(self._freq_counter)
|
||||
if total < 0.01:
|
||||
confidence = 0.0
|
||||
else:
|
||||
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
|
||||
/ total)
|
||||
if confidence < 0.0:
|
||||
confidence = 0.0
|
||||
# lower the confidence of latin1 so that other more accurate
|
||||
# detector can take priority.
|
||||
confidence = confidence * 0.73
|
||||
return confidence
|
|
@ -0,0 +1,91 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class MultiByteCharSetProber(CharSetProber):
|
||||
"""
|
||||
MultiByteCharSetProber
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.distribution_analyzer = None
|
||||
self.coding_sm = None
|
||||
self._last_char = [0, 0]
|
||||
|
||||
def reset(self):
|
||||
super(MultiByteCharSetProber, self).reset()
|
||||
if self.coding_sm:
|
||||
self.coding_sm.reset()
|
||||
if self.distribution_analyzer:
|
||||
self.distribution_analyzer.reset()
|
||||
self._last_char = [0, 0]
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.distribution_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
return self.distribution_analyzer.get_confidence()
|
|
@ -0,0 +1,54 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .utf8prober import UTF8Prober
|
||||
from .sjisprober import SJISProber
|
||||
from .eucjpprober import EUCJPProber
|
||||
from .gb2312prober import GB2312Prober
|
||||
from .euckrprober import EUCKRProber
|
||||
from .cp949prober import CP949Prober
|
||||
from .big5prober import Big5Prober
|
||||
from .euctwprober import EUCTWProber
|
||||
|
||||
|
||||
class MBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self.probers = [
|
||||
UTF8Prober(),
|
||||
SJISProber(),
|
||||
EUCJPProber(),
|
||||
GB2312Prober(),
|
||||
EUCKRProber(),
|
||||
CP949Prober(),
|
||||
Big5Prober(),
|
||||
EUCTWProber()
|
||||
]
|
||||
self.reset()
|
|
@ -0,0 +1,572 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
|
||||
BIG5_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
4,4,4,4,4,4,4,4, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
4,3,3,3,3,3,3,3, # a0 - a7
|
||||
3,3,3,3,3,3,3,3, # a8 - af
|
||||
3,3,3,3,3,3,3,3, # b0 - b7
|
||||
3,3,3,3,3,3,3,3, # b8 - bf
|
||||
3,3,3,3,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
BIG5_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||
)
|
||||
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||
'class_factor': 5,
|
||||
'state_table': BIG5_ST,
|
||||
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||
'name': 'Big5'}
|
||||
|
||||
# CP949
|
||||
|
||||
CP949_CLS = (
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
|
||||
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
|
||||
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
|
||||
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
|
||||
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
|
||||
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
|
||||
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
|
||||
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
|
||||
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
|
||||
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||
)
|
||||
|
||||
CP949_ST = (
|
||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||
)
|
||||
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': CP949_ST,
|
||||
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||
'name': 'CP949'}
|
||||
|
||||
# EUC-JP
|
||||
|
||||
EUCJP_CLS = (
|
||||
4,4,4,4,4,4,4,4, # 00 - 07
|
||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||
4,4,4,4,4,4,4,4, # 10 - 17
|
||||
4,4,4,5,4,4,4,4, # 18 - 1f
|
||||
4,4,4,4,4,4,4,4, # 20 - 27
|
||||
4,4,4,4,4,4,4,4, # 28 - 2f
|
||||
4,4,4,4,4,4,4,4, # 30 - 37
|
||||
4,4,4,4,4,4,4,4, # 38 - 3f
|
||||
4,4,4,4,4,4,4,4, # 40 - 47
|
||||
4,4,4,4,4,4,4,4, # 48 - 4f
|
||||
4,4,4,4,4,4,4,4, # 50 - 57
|
||||
4,4,4,4,4,4,4,4, # 58 - 5f
|
||||
4,4,4,4,4,4,4,4, # 60 - 67
|
||||
4,4,4,4,4,4,4,4, # 68 - 6f
|
||||
4,4,4,4,4,4,4,4, # 70 - 77
|
||||
4,4,4,4,4,4,4,4, # 78 - 7f
|
||||
5,5,5,5,5,5,5,5, # 80 - 87
|
||||
5,5,5,5,5,5,1,3, # 88 - 8f
|
||||
5,5,5,5,5,5,5,5, # 90 - 97
|
||||
5,5,5,5,5,5,5,5, # 98 - 9f
|
||||
5,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,0,5 # f8 - ff
|
||||
)
|
||||
|
||||
EUCJP_ST = (
|
||||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||
)
|
||||
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': EUCJP_ST,
|
||||
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-JP'}
|
||||
|
||||
# EUC-KR
|
||||
|
||||
EUCKR_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,3,3,3, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,3,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCKR_ST = (
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||
)
|
||||
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||
'class_factor': 4,
|
||||
'state_table': EUCKR_ST,
|
||||
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-KR'}
|
||||
|
||||
# EUC-TW
|
||||
|
||||
EUCTW_CLS = (
|
||||
2,2,2,2,2,2,2,2, # 00 - 07
|
||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||
2,2,2,2,2,2,2,2, # 10 - 17
|
||||
2,2,2,0,2,2,2,2, # 18 - 1f
|
||||
2,2,2,2,2,2,2,2, # 20 - 27
|
||||
2,2,2,2,2,2,2,2, # 28 - 2f
|
||||
2,2,2,2,2,2,2,2, # 30 - 37
|
||||
2,2,2,2,2,2,2,2, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,2, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,6,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,3,4,4,4,4,4,4, # a0 - a7
|
||||
5,5,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,3,1,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCTW_ST = (
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': EUCTW_ST,
|
||||
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||
'name': 'x-euc-tw'}
|
||||
|
||||
# GB2312
|
||||
|
||||
GB2312_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
3,3,3,3,3,3,3,3, # 30 - 37
|
||||
3,3,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,4, # 78 - 7f
|
||||
5,6,6,6,6,6,6,6, # 80 - 87
|
||||
6,6,6,6,6,6,6,6, # 88 - 8f
|
||||
6,6,6,6,6,6,6,6, # 90 - 97
|
||||
6,6,6,6,6,6,6,6, # 98 - 9f
|
||||
6,6,6,6,6,6,6,6, # a0 - a7
|
||||
6,6,6,6,6,6,6,6, # a8 - af
|
||||
6,6,6,6,6,6,6,6, # b0 - b7
|
||||
6,6,6,6,6,6,6,6, # b8 - bf
|
||||
6,6,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
6,6,6,6,6,6,6,6, # e0 - e7
|
||||
6,6,6,6,6,6,6,6, # e8 - ef
|
||||
6,6,6,6,6,6,6,6, # f0 - f7
|
||||
6,6,6,6,6,6,6,0 # f8 - ff
|
||||
)
|
||||
|
||||
GB2312_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||
# But it is not necessary to discriminate between the two since
|
||||
# it is used for frequency analysis only, and we are validating
|
||||
# each code range there as well. So it is safe to set it to be
|
||||
# 2 here.
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': GB2312_ST,
|
||||
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||
'name': 'GB2312'}
|
||||
|
||||
# Shift_JIS
|
||||
|
||||
SJIS_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
3,3,3,3,3,2,2,3, # 80 - 87
|
||||
3,3,3,3,3,3,3,3, # 88 - 8f
|
||||
3,3,3,3,3,3,3,3, # 90 - 97
|
||||
3,3,3,3,3,3,3,3, # 98 - 9f
|
||||
#0xa0 is illegal in sjis encoding, but some pages does
|
||||
#contain such byte. We need to be more error forgiven.
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,4,4,4, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,0,0,0) # f8 - ff
|
||||
|
||||
|
||||
SJIS_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||
)
|
||||
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': SJIS_ST,
|
||||
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||
'name': 'Shift_JIS'}
|
||||
|
||||
# UCS2-BE
|
||||
|
||||
UCS2BE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2BE_ST = (
|
||||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2BE_ST,
|
||||
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16BE'}
|
||||
|
||||
# UCS2-LE
|
||||
|
||||
UCS2LE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2LE_ST = (
|
||||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2LE_ST,
|
||||
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16LE'}
|
||||
|
||||
# UTF-8
|
||||
|
||||
UTF8_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
2,2,2,2,3,3,3,3, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
5,5,5,5,5,5,5,5, # a0 - a7
|
||||
5,5,5,5,5,5,5,5, # a8 - af
|
||||
5,5,5,5,5,5,5,5, # b0 - b7
|
||||
5,5,5,5,5,5,5,5, # b8 - bf
|
||||
0,0,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
7,8,8,8,8,8,8,8, # e0 - e7
|
||||
8,8,8,8,8,9,8,8, # e8 - ef
|
||||
10,11,11,11,11,11,11,11, # f0 - f7
|
||||
12,13,13,13,14,15,0,0 # f8 - ff
|
||||
)
|
||||
|
||||
UTF8_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||
)
|
||||
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||
'class_factor': 16,
|
||||
'state_table': UTF8_ST,
|
||||
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-8'}
|
|
@ -0,0 +1,132 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
SAMPLE_SIZE = 64
|
||||
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
|
||||
def __init__(self, model, reversed=False, name_prober=None):
|
||||
super(SingleByteCharSetProber, self).__init__()
|
||||
self._model = model
|
||||
# TRUE if we need to reverse every pair in the model lookup
|
||||
self._reversed = reversed
|
||||
# Optional auxiliary prober for name decision
|
||||
self._name_prober = name_prober
|
||||
self._last_order = None
|
||||
self._seq_counters = None
|
||||
self._total_seqs = None
|
||||
self._total_char = None
|
||||
self._freq_char = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SingleByteCharSetProber, self).reset()
|
||||
# char order of last character
|
||||
self._last_order = 255
|
||||
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
|
||||
self._total_seqs = 0
|
||||
self._total_char = 0
|
||||
# characters that fall in our sampling range
|
||||
self._freq_char = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.charset_name
|
||||
else:
|
||||
return self._model['charset_name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.language
|
||||
else:
|
||||
return self._model.get('language')
|
||||
|
||||
def feed(self, byte_str):
|
||||
if not self._model['keep_english_letter']:
|
||||
byte_str = self.filter_international_words(byte_str)
|
||||
if not byte_str:
|
||||
return self.state
|
||||
char_to_order_map = self._model['char_to_order_map']
|
||||
for i, c in enumerate(byte_str):
|
||||
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
|
||||
# but that leads to 27 more test failures than before.
|
||||
order = char_to_order_map[c]
|
||||
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
|
||||
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
|
||||
# to make it closer to the original intent. The only difference
|
||||
# is whether or not we count digits and control characters for
|
||||
# _total_char purposes.
|
||||
if order < CharacterCategory.CONTROL:
|
||||
self._total_char += 1
|
||||
if order < self.SAMPLE_SIZE:
|
||||
self._freq_char += 1
|
||||
if self._last_order < self.SAMPLE_SIZE:
|
||||
self._total_seqs += 1
|
||||
if not self._reversed:
|
||||
i = (self._last_order * self.SAMPLE_SIZE) + order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
else: # reverse the order of the letters in the lookup
|
||||
i = (order * self.SAMPLE_SIZE) + self._last_order
|
||||
model = self._model['precedence_matrix'][i]
|
||||
self._seq_counters[model] += 1
|
||||
self._last_order = order
|
||||
|
||||
charset_name = self._model['charset_name']
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
|
||||
confidence = self.get_confidence()
|
||||
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, we have a winner',
|
||||
charset_name, confidence)
|
||||
self._state = ProbingState.FOUND_IT
|
||||
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, below negative '
|
||||
'shortcut threshhold %s', charset_name,
|
||||
confidence,
|
||||
self.NEGATIVE_SHORTCUT_THRESHOLD)
|
||||
self._state = ProbingState.NOT_ME
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
r = 0.01
|
||||
if self._total_seqs > 0:
|
||||
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
|
||||
self._total_seqs / self._model['typical_positive_ratio'])
|
||||
r = r * self._freq_char / self._total_char
|
||||
if r >= 1.0:
|
||||
r = 0.99
|
||||
return r
|
|
@ -0,0 +1,73 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .sbcharsetprober import SingleByteCharSetProber
|
||||
from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
|
||||
Latin5CyrillicModel, MacCyrillicModel,
|
||||
Ibm866Model, Ibm855Model)
|
||||
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
|
||||
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
|
||||
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
|
||||
from .langthaimodel import TIS620ThaiModel
|
||||
from .langhebrewmodel import Win1255HebrewModel
|
||||
from .hebrewprober import HebrewProber
|
||||
from .langturkishmodel import Latin5TurkishModel
|
||||
|
||||
|
||||
class SBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
super(SBCSGroupProber, self).__init__()
|
||||
self.probers = [
|
||||
SingleByteCharSetProber(Win1251CyrillicModel),
|
||||
SingleByteCharSetProber(Koi8rModel),
|
||||
SingleByteCharSetProber(Latin5CyrillicModel),
|
||||
SingleByteCharSetProber(MacCyrillicModel),
|
||||
SingleByteCharSetProber(Ibm866Model),
|
||||
SingleByteCharSetProber(Ibm855Model),
|
||||
SingleByteCharSetProber(Latin7GreekModel),
|
||||
SingleByteCharSetProber(Win1253GreekModel),
|
||||
SingleByteCharSetProber(Latin5BulgarianModel),
|
||||
SingleByteCharSetProber(Win1251BulgarianModel),
|
||||
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
||||
# after we retrain model.
|
||||
# SingleByteCharSetProber(Latin2HungarianModel),
|
||||
# SingleByteCharSetProber(Win1250HungarianModel),
|
||||
SingleByteCharSetProber(TIS620ThaiModel),
|
||||
SingleByteCharSetProber(Latin5TurkishModel),
|
||||
]
|
||||
hebrew_prober = HebrewProber()
|
||||
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
|
||||
False, hebrew_prober)
|
||||
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
|
||||
hebrew_prober)
|
||||
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
|
||||
self.probers.extend([hebrew_prober, logical_hebrew_prober,
|
||||
visual_hebrew_prober])
|
||||
|
||||
self.reset()
|
|
@ -0,0 +1,92 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import SJISDistributionAnalysis
|
||||
from .jpcntx import SJISContextAnalysis
|
||||
from .mbcssm import SJIS_SM_MODEL
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class SJISProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(SJISProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||
self.context_analyzer = SJISContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SJISProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self.context_analyzer.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
||||
- char_len], char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
|
@ -0,0 +1,286 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
"""
|
||||
Module containing the UniversalDetector detector class, which is the primary
|
||||
class a user of ``chardet`` should use.
|
||||
|
||||
:author: Mark Pilgrim (initial port to Python)
|
||||
:author: Shy Shalom (original C code)
|
||||
:author: Dan Blanchard (major refactoring for 3.0)
|
||||
:author: Ian Cordasco
|
||||
"""
|
||||
|
||||
|
||||
import codecs
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .enums import InputState, LanguageFilter, ProbingState
|
||||
from .escprober import EscCharSetProber
|
||||
from .latin1prober import Latin1Prober
|
||||
from .mbcsgroupprober import MBCSGroupProber
|
||||
from .sbcsgroupprober import SBCSGroupProber
|
||||
|
||||
|
||||
class UniversalDetector(object):
|
||||
"""
|
||||
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
||||
and coordinates all of the different charset probers.
|
||||
|
||||
To get a ``dict`` containing an encoding and its confidence, you can simply
|
||||
run:
|
||||
|
||||
.. code::
|
||||
|
||||
u = UniversalDetector()
|
||||
u.feed(some_bytes)
|
||||
u.close()
|
||||
detected = u.result
|
||||
|
||||
"""
|
||||
|
||||
MINIMUM_THRESHOLD = 0.20
|
||||
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
||||
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
||||
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
||||
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
||||
'iso-8859-2': 'Windows-1250',
|
||||
'iso-8859-5': 'Windows-1251',
|
||||
'iso-8859-6': 'Windows-1256',
|
||||
'iso-8859-7': 'Windows-1253',
|
||||
'iso-8859-8': 'Windows-1255',
|
||||
'iso-8859-9': 'Windows-1254',
|
||||
'iso-8859-13': 'Windows-1257'}
|
||||
|
||||
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||
self._esc_charset_prober = None
|
||||
self._charset_probers = []
|
||||
self.result = None
|
||||
self.done = None
|
||||
self._got_data = None
|
||||
self._input_state = None
|
||||
self._last_char = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._has_win_bytes = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the UniversalDetector and all of its probers back to their
|
||||
initial states. This is called by ``__init__``, so you only need to
|
||||
call this directly in between analyses of different documents.
|
||||
"""
|
||||
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
||||
self.done = False
|
||||
self._got_data = False
|
||||
self._has_win_bytes = False
|
||||
self._input_state = InputState.PURE_ASCII
|
||||
self._last_char = b''
|
||||
if self._esc_charset_prober:
|
||||
self._esc_charset_prober.reset()
|
||||
for prober in self._charset_probers:
|
||||
prober.reset()
|
||||
|
||||
def feed(self, byte_str):
|
||||
"""
|
||||
Takes a chunk of a document and feeds it through all of the relevant
|
||||
charset probers.
|
||||
|
||||
After calling ``feed``, you can check the value of the ``done``
|
||||
attribute to see if you need to continue feeding the
|
||||
``UniversalDetector`` more data, or if it has made a prediction
|
||||
(in the ``result`` attribute).
|
||||
|
||||
.. note::
|
||||
You should always call ``close`` when you're done feeding in your
|
||||
document if ``done`` is not already ``True``.
|
||||
"""
|
||||
if self.done:
|
||||
return
|
||||
|
||||
if not len(byte_str):
|
||||
return
|
||||
|
||||
if not isinstance(byte_str, bytearray):
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
# First check for known BOMs, since these are guaranteed to be correct
|
||||
if not self._got_data:
|
||||
# If the data starts with BOM, we know it is UTF
|
||||
if byte_str.startswith(codecs.BOM_UTF8):
|
||||
# EF BB BF UTF-8 with BOM
|
||||
self.result = {'encoding': "UTF-8-SIG",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
||||
codecs.BOM_UTF32_BE)):
|
||||
# FF FE 00 00 UTF-32, little-endian BOM
|
||||
# 00 00 FE FF UTF-32, big-endian BOM
|
||||
self.result = {'encoding': "UTF-32",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
||||
# FF FE UTF-16, little endian BOM
|
||||
# FE FF UTF-16, big endian BOM
|
||||
self.result = {'encoding': "UTF-16",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
self._got_data = True
|
||||
if self.result['encoding'] is not None:
|
||||
self.done = True
|
||||
return
|
||||
|
||||
# If none of those matched and we've only see ASCII so far, check
|
||||
# for high bytes and escape sequences
|
||||
if self._input_state == InputState.PURE_ASCII:
|
||||
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
||||
self._input_state = InputState.HIGH_BYTE
|
||||
elif self._input_state == InputState.PURE_ASCII and \
|
||||
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
||||
self._input_state = InputState.ESC_ASCII
|
||||
|
||||
self._last_char = byte_str[-1:]
|
||||
|
||||
# If we've seen escape sequences, use the EscCharSetProber, which
|
||||
# uses a simple state machine to check for known escape sequences in
|
||||
# HZ and ISO-2022 encodings, since those are the only encodings that
|
||||
# use such sequences.
|
||||
if self._input_state == InputState.ESC_ASCII:
|
||||
if not self._esc_charset_prober:
|
||||
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
||||
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding':
|
||||
self._esc_charset_prober.charset_name,
|
||||
'confidence':
|
||||
self._esc_charset_prober.get_confidence(),
|
||||
'language':
|
||||
self._esc_charset_prober.language}
|
||||
self.done = True
|
||||
# If we've seen high bytes (i.e., those with values greater than 127),
|
||||
# we need to do more complicated checks using all our multi-byte and
|
||||
# single-byte probers that are left. The single-byte probers
|
||||
# use character bigram distributions to determine the encoding, whereas
|
||||
# the multi-byte probers use a combination of character unigram and
|
||||
# bigram distributions.
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
if not self._charset_probers:
|
||||
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
||||
# If we're checking non-CJK encodings, use single-byte prober
|
||||
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||
self._charset_probers.append(SBCSGroupProber())
|
||||
self._charset_probers.append(Latin1Prober())
|
||||
for prober in self._charset_probers:
|
||||
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding': prober.charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language}
|
||||
self.done = True
|
||||
break
|
||||
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||
self._has_win_bytes = True
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Stop analyzing the current document and come up with a final
|
||||
prediction.
|
||||
|
||||
:returns: The ``result`` attribute, a ``dict`` with the keys
|
||||
`encoding`, `confidence`, and `language`.
|
||||
"""
|
||||
# Don't bother with checks if we're already done
|
||||
if self.done:
|
||||
return self.result
|
||||
self.done = True
|
||||
|
||||
if not self._got_data:
|
||||
self.logger.debug('no data received!')
|
||||
|
||||
# Default to ASCII if it is all we've seen so far
|
||||
elif self._input_state == InputState.PURE_ASCII:
|
||||
self.result = {'encoding': 'ascii',
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
prober_confidence = None
|
||||
max_prober_confidence = 0.0
|
||||
max_prober = None
|
||||
for prober in self._charset_probers:
|
||||
if not prober:
|
||||
continue
|
||||
prober_confidence = prober.get_confidence()
|
||||
if prober_confidence > max_prober_confidence:
|
||||
max_prober_confidence = prober_confidence
|
||||
max_prober = prober
|
||||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||
charset_name = max_prober.charset_name
|
||||
lower_charset_name = max_prober.charset_name.lower()
|
||||
confidence = max_prober.get_confidence()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if self._has_win_bytes:
|
||||
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
self.result = {'encoding': charset_name,
|
||||
'confidence': confidence,
|
||||
'language': max_prober.language}
|
||||
|
||||
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
||||
if self.logger.getEffectiveLevel() == logging.DEBUG:
|
||||
if self.result['encoding'] is None:
|
||||
self.logger.debug('no probers hit minimum threshold')
|
||||
for group_prober in self._charset_probers:
|
||||
if not group_prober:
|
||||
continue
|
||||
if isinstance(group_prober, CharSetGroupProber):
|
||||
for prober in group_prober.probers:
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
else:
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
return self.result
|
|
@ -0,0 +1,82 @@
|
|||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcssm import UTF8_SM_MODEL
|
||||
|
||||
|
||||
|
||||
class UTF8Prober(CharSetProber):
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
def __init__(self):
|
||||
super(UTF8Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||
self._num_mb_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(UTF8Prober, self).reset()
|
||||
self.coding_sm.reset()
|
||||
self._num_mb_chars = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "utf-8"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
coding_state = self.coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
if self.coding_sm.get_current_charlen() >= 2:
|
||||
self._num_mb_chars += 1
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
unlike = 0.99
|
||||
if self._num_mb_chars < 6:
|
||||
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
|
||||
return 1.0 - unlike
|
||||
else:
|
||||
return unlike
|
|
@ -0,0 +1,9 @@
|
|||
"""
|
||||
This module exists only to simplify retrieving the version number of chardet
|
||||
from within setup.py and from chardet subpackages.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
__version__ = "3.0.4"
|
||||
VERSION = __version__.split('.')
|
|
@ -0,0 +1,2 @@
|
|||
from .package_data import __version__
|
||||
from .core import *
|
|
@ -0,0 +1,118 @@
|
|||
from .core import encode, decode, alabel, ulabel, IDNAError
|
||||
import codecs
|
||||
import re
|
||||
|
||||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self, data, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return "", 0
|
||||
|
||||
return encode(data), len(data)
|
||||
|
||||
def decode(self, data, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return u"", 0
|
||||
|
||||
return decode(data), len(data)
|
||||
|
||||
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
|
||||
def _buffer_encode(self, data, errors, final):
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return ("", 0)
|
||||
|
||||
labels = _unicode_dots_re.split(data)
|
||||
trailing_dot = u''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = '.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = '.'
|
||||
|
||||
result = []
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(alabel(label))
|
||||
if size:
|
||||
size += 1
|
||||
size += len(label)
|
||||
|
||||
# Join with U+002E
|
||||
result = ".".join(result) + trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
||||
def _buffer_decode(self, data, errors, final):
|
||||
if errors != 'strict':
|
||||
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
|
||||
|
||||
if not data:
|
||||
return (u"", 0)
|
||||
|
||||
# IDNA allows decoding to operate on Unicode strings, too.
|
||||
if isinstance(data, unicode):
|
||||
labels = _unicode_dots_re.split(data)
|
||||
else:
|
||||
# Must be ASCII string
|
||||
data = str(data)
|
||||
unicode(data, "ascii")
|
||||
labels = data.split(".")
|
||||
|
||||
trailing_dot = u''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = u'.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = u'.'
|
||||
|
||||
result = []
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(ulabel(label))
|
||||
if size:
|
||||
size += 1
|
||||
size += len(label)
|
||||
|
||||
result = u".".join(result) + trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='idna',
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamwriter=StreamWriter,
|
||||
streamreader=StreamReader,
|
||||
)
|
|
@ -0,0 +1,12 @@
|
|||
from .core import *
|
||||
from .codec import *
|
||||
|
||||
def ToASCII(label):
|
||||
return encode(label)
|
||||
|
||||
def ToUnicode(label):
|
||||
return decode(label)
|
||||
|
||||
def nameprep(s):
|
||||
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
|
||||
|
|
@ -0,0 +1,396 @@
|
|||
from . import idnadata
|
||||
import bisect
|
||||
import unicodedata
|
||||
import re
|
||||
import sys
|
||||
from .intranges import intranges_contain
|
||||
|
||||
_virama_combining_class = 9
|
||||
_alabel_prefix = b'xn--'
|
||||
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
|
||||
|
||||
if sys.version_info[0] == 3:
|
||||
unicode = str
|
||||
unichr = chr
|
||||
|
||||
class IDNAError(UnicodeError):
|
||||
""" Base exception for all IDNA-encoding related problems """
|
||||
pass
|
||||
|
||||
|
||||
class IDNABidiError(IDNAError):
|
||||
""" Exception when bidirectional requirements are not satisfied """
|
||||
pass
|
||||
|
||||
|
||||
class InvalidCodepoint(IDNAError):
|
||||
""" Exception when a disallowed or unallocated codepoint is used """
|
||||
pass
|
||||
|
||||
|
||||
class InvalidCodepointContext(IDNAError):
|
||||
""" Exception when the codepoint is not valid in the context it is used """
|
||||
pass
|
||||
|
||||
|
||||
def _combining_class(cp):
|
||||
v = unicodedata.combining(unichr(cp))
|
||||
if v == 0:
|
||||
if not unicodedata.name(unichr(cp)):
|
||||
raise ValueError("Unknown character in unicodedata")
|
||||
return v
|
||||
|
||||
def _is_script(cp, script):
|
||||
return intranges_contain(ord(cp), idnadata.scripts[script])
|
||||
|
||||
def _punycode(s):
|
||||
return s.encode('punycode')
|
||||
|
||||
def _unot(s):
|
||||
return 'U+{0:04X}'.format(s)
|
||||
|
||||
|
||||
def valid_label_length(label):
|
||||
|
||||
if len(label) > 63:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def valid_string_length(label, trailing_dot):
|
||||
|
||||
if len(label) > (254 if trailing_dot else 253):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_bidi(label, check_ltr=False):
|
||||
|
||||
# Bidi rules should only be applied if string contains RTL characters
|
||||
bidi_label = False
|
||||
for (idx, cp) in enumerate(label, 1):
|
||||
direction = unicodedata.bidirectional(cp)
|
||||
if direction == '':
|
||||
# String likely comes from a newer version of Unicode
|
||||
raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
|
||||
if direction in ['R', 'AL', 'AN']:
|
||||
bidi_label = True
|
||||
if not bidi_label and not check_ltr:
|
||||
return True
|
||||
|
||||
# Bidi rule 1
|
||||
direction = unicodedata.bidirectional(label[0])
|
||||
if direction in ['R', 'AL']:
|
||||
rtl = True
|
||||
elif direction == 'L':
|
||||
rtl = False
|
||||
else:
|
||||
raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
|
||||
|
||||
valid_ending = False
|
||||
number_type = False
|
||||
for (idx, cp) in enumerate(label, 1):
|
||||
direction = unicodedata.bidirectional(cp)
|
||||
|
||||
if rtl:
|
||||
# Bidi rule 2
|
||||
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
|
||||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
|
||||
# Bidi rule 3
|
||||
if direction in ['R', 'AL', 'EN', 'AN']:
|
||||
valid_ending = True
|
||||
elif direction != 'NSM':
|
||||
valid_ending = False
|
||||
# Bidi rule 4
|
||||
if direction in ['AN', 'EN']:
|
||||
if not number_type:
|
||||
number_type = direction
|
||||
else:
|
||||
if number_type != direction:
|
||||
raise IDNABidiError('Can not mix numeral types in a right-to-left label')
|
||||
else:
|
||||
# Bidi rule 5
|
||||
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
|
||||
raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
|
||||
# Bidi rule 6
|
||||
if direction in ['L', 'EN']:
|
||||
valid_ending = True
|
||||
elif direction != 'NSM':
|
||||
valid_ending = False
|
||||
|
||||
if not valid_ending:
|
||||
raise IDNABidiError('Label ends with illegal codepoint directionality')
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_initial_combiner(label):
|
||||
|
||||
if unicodedata.category(label[0])[0] == 'M':
|
||||
raise IDNAError('Label begins with an illegal combining character')
|
||||
return True
|
||||
|
||||
|
||||
def check_hyphen_ok(label):
|
||||
|
||||
if label[2:4] == '--':
|
||||
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
|
||||
if label[0] == '-' or label[-1] == '-':
|
||||
raise IDNAError('Label must not start or end with a hyphen')
|
||||
return True
|
||||
|
||||
|
||||
def check_nfc(label):
|
||||
|
||||
if unicodedata.normalize('NFC', label) != label:
|
||||
raise IDNAError('Label must be in Normalization Form C')
|
||||
|
||||
|
||||
def valid_contextj(label, pos):
|
||||
|
||||
cp_value = ord(label[pos])
|
||||
|
||||
if cp_value == 0x200c:
|
||||
|
||||
if pos > 0:
|
||||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
|
||||
return True
|
||||
|
||||
ok = False
|
||||
for i in range(pos-1, -1, -1):
|
||||
joining_type = idnadata.joining_types.get(ord(label[i]))
|
||||
if joining_type == ord('T'):
|
||||
continue
|
||||
if joining_type in [ord('L'), ord('D')]:
|
||||
ok = True
|
||||
break
|
||||
|
||||
if not ok:
|
||||
return False
|
||||
|
||||
ok = False
|
||||
for i in range(pos+1, len(label)):
|
||||
joining_type = idnadata.joining_types.get(ord(label[i]))
|
||||
if joining_type == ord('T'):
|
||||
continue
|
||||
if joining_type in [ord('R'), ord('D')]:
|
||||
ok = True
|
||||
break
|
||||
return ok
|
||||
|
||||
if cp_value == 0x200d:
|
||||
|
||||
if pos > 0:
|
||||
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
|
||||
return True
|
||||
return False
|
||||
|
||||
else:
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def valid_contexto(label, pos, exception=False):
|
||||
|
||||
cp_value = ord(label[pos])
|
||||
|
||||
if cp_value == 0x00b7:
|
||||
if 0 < pos < len(label)-1:
|
||||
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
|
||||
return True
|
||||
return False
|
||||
|
||||
elif cp_value == 0x0375:
|
||||
if pos < len(label)-1 and len(label) > 1:
|
||||
return _is_script(label[pos + 1], 'Greek')
|
||||
return False
|
||||
|
||||
elif cp_value == 0x05f3 or cp_value == 0x05f4:
|
||||
if pos > 0:
|
||||
return _is_script(label[pos - 1], 'Hebrew')
|
||||
return False
|
||||
|
||||
elif cp_value == 0x30fb:
|
||||
for cp in label:
|
||||
if cp == u'\u30fb':
|
||||
continue
|
||||
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'):
|
||||
return True
|
||||
return False
|
||||
|
||||
elif 0x660 <= cp_value <= 0x669:
|
||||
for cp in label:
|
||||
if 0x6f0 <= ord(cp) <= 0x06f9:
|
||||
return False
|
||||
return True
|
||||
|
||||
elif 0x6f0 <= cp_value <= 0x6f9:
|
||||
for cp in label:
|
||||
if 0x660 <= ord(cp) <= 0x0669:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def check_label(label):
|
||||
|
||||
if isinstance(label, (bytes, bytearray)):
|
||||
label = label.decode('utf-8')
|
||||
if len(label) == 0:
|
||||
raise IDNAError('Empty Label')
|
||||
|
||||
check_nfc(label)
|
||||
check_hyphen_ok(label)
|
||||
check_initial_combiner(label)
|
||||
|
||||
for (pos, cp) in enumerate(label):
|
||||
cp_value = ord(cp)
|
||||
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
|
||||
continue
|
||||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
|
||||
try:
|
||||
if not valid_contextj(label, pos):
|
||||
raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(
|
||||
_unot(cp_value), pos+1, repr(label)))
|
||||
except ValueError:
|
||||
raise IDNAError('Unknown codepoint adjacent to joiner {0} at position {1} in {2}'.format(
|
||||
_unot(cp_value), pos+1, repr(label)))
|
||||
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
|
||||
if not valid_contexto(label, pos):
|
||||
raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
else:
|
||||
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
|
||||
|
||||
check_bidi(label)
|
||||
|
||||
|
||||
def alabel(label):
|
||||
|
||||
try:
|
||||
label = label.encode('ascii')
|
||||
ulabel(label)
|
||||
if not valid_label_length(label):
|
||||
raise IDNAError('Label too long')
|
||||
return label
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
if not label:
|
||||
raise IDNAError('No Input')
|
||||
|
||||
label = unicode(label)
|
||||
check_label(label)
|
||||
label = _punycode(label)
|
||||
label = _alabel_prefix + label
|
||||
|
||||
if not valid_label_length(label):
|
||||
raise IDNAError('Label too long')
|
||||
|
||||
return label
|
||||
|
||||
|
||||
def ulabel(label):
|
||||
|
||||
if not isinstance(label, (bytes, bytearray)):
|
||||
try:
|
||||
label = label.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
check_label(label)
|
||||
return label
|
||||
|
||||
label = label.lower()
|
||||
if label.startswith(_alabel_prefix):
|
||||
label = label[len(_alabel_prefix):]
|
||||
else:
|
||||
check_label(label)
|
||||
return label.decode('ascii')
|
||||
|
||||
label = label.decode('punycode')
|
||||
check_label(label)
|
||||
return label
|
||||
|
||||
|
||||
def uts46_remap(domain, std3_rules=True, transitional=False):
|
||||
"""Re-map the characters in the string according to UTS46 processing."""
|
||||
from .uts46data import uts46data
|
||||
output = u""
|
||||
try:
|
||||
for pos, char in enumerate(domain):
|
||||
code_point = ord(char)
|
||||
uts46row = uts46data[code_point if code_point < 256 else
|
||||
bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
|
||||
status = uts46row[1]
|
||||
replacement = uts46row[2] if len(uts46row) == 3 else None
|
||||
if (status == "V" or
|
||||
(status == "D" and not transitional) or
|
||||
(status == "3" and not std3_rules and replacement is None)):
|
||||
output += char
|
||||
elif replacement is not None and (status == "M" or
|
||||
(status == "3" and not std3_rules) or
|
||||
(status == "D" and transitional)):
|
||||
output += replacement
|
||||
elif status != "I":
|
||||
raise IndexError()
|
||||
return unicodedata.normalize("NFC", output)
|
||||
except IndexError:
|
||||
raise InvalidCodepoint(
|
||||
"Codepoint {0} not allowed at position {1} in {2}".format(
|
||||
_unot(code_point), pos + 1, repr(domain)))
|
||||
|
||||
|
||||
def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
|
||||
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
s = s.decode("ascii")
|
||||
if uts46:
|
||||
s = uts46_remap(s, std3_rules, transitional)
|
||||
trailing_dot = False
|
||||
result = []
|
||||
if strict:
|
||||
labels = s.split('.')
|
||||
else:
|
||||
labels = _unicode_dots_re.split(s)
|
||||
if not labels or labels == ['']:
|
||||
raise IDNAError('Empty domain')
|
||||
if labels[-1] == '':
|
||||
del labels[-1]
|
||||
trailing_dot = True
|
||||
for label in labels:
|
||||
s = alabel(label)
|
||||
if s:
|
||||
result.append(s)
|
||||
else:
|
||||
raise IDNAError('Empty label')
|
||||
if trailing_dot:
|
||||
result.append(b'')
|
||||
s = b'.'.join(result)
|
||||
if not valid_string_length(s, trailing_dot):
|
||||
raise IDNAError('Domain too long')
|
||||
return s
|
||||
|
||||
|
||||
def decode(s, strict=False, uts46=False, std3_rules=False):
|
||||
|
||||
if isinstance(s, (bytes, bytearray)):
|
||||
s = s.decode("ascii")
|
||||
if uts46:
|
||||
s = uts46_remap(s, std3_rules, False)
|
||||
trailing_dot = False
|
||||
result = []
|
||||
if not strict:
|
||||
labels = _unicode_dots_re.split(s)
|
||||
else:
|
||||
labels = s.split(u'.')
|
||||
if not labels or labels == ['']:
|
||||
raise IDNAError('Empty domain')
|
||||
if not labels[-1]:
|
||||
del labels[-1]
|
||||
trailing_dot = True
|
||||
for label in labels:
|
||||
s = ulabel(label)
|
||||
if s:
|
||||
result.append(s)
|
||||
else:
|
||||
raise IDNAError('Empty label')
|
||||
if trailing_dot:
|
||||
result.append(u'')
|
||||
return u'.'.join(result)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,53 @@
|
|||
"""
|
||||
Given a list of integers, made up of (hopefully) a small number of long runs
|
||||
of consecutive integers, compute a representation of the form
|
||||
((start1, end1), (start2, end2) ...). Then answer the question "was x present
|
||||
in the original list?" in time O(log(# runs)).
|
||||
"""
|
||||
|
||||
import bisect
|
||||
|
||||
def intranges_from_list(list_):
|
||||
"""Represent a list of integers as a sequence of ranges:
|
||||
((start_0, end_0), (start_1, end_1), ...), such that the original
|
||||
integers are exactly those x such that start_i <= x < end_i for some i.
|
||||
|
||||
Ranges are encoded as single integers (start << 32 | end), not as tuples.
|
||||
"""
|
||||
|
||||
sorted_list = sorted(list_)
|
||||
ranges = []
|
||||
last_write = -1
|
||||
for i in range(len(sorted_list)):
|
||||
if i+1 < len(sorted_list):
|
||||
if sorted_list[i] == sorted_list[i+1]-1:
|
||||
continue
|
||||
current_range = sorted_list[last_write+1:i+1]
|
||||
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
|
||||
last_write = i
|
||||
|
||||
return tuple(ranges)
|
||||
|
||||
def _encode_range(start, end):
|
||||
return (start << 32) | end
|
||||
|
||||
def _decode_range(r):
|
||||
return (r >> 32), (r & ((1 << 32) - 1))
|
||||
|
||||
|
||||
def intranges_contain(int_, ranges):
|
||||
"""Determine if `int_` falls into one of the ranges in `ranges`."""
|
||||
tuple_ = _encode_range(int_, 0)
|
||||
pos = bisect.bisect_left(ranges, tuple_)
|
||||
# we could be immediately ahead of a tuple (start, end)
|
||||
# with start < int_ <= end
|
||||
if pos > 0:
|
||||
left, right = _decode_range(ranges[pos-1])
|
||||
if left <= int_ < right:
|
||||
return True
|
||||
# or we could be immediately behind a tuple (int_, end)
|
||||
if pos < len(ranges):
|
||||
left, _ = _decode_range(ranges[pos])
|
||||
if left == int_:
|
||||
return True
|
||||
return False
|
|
@ -0,0 +1,2 @@
|
|||
__version__ = '2.8'
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,131 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# __
|
||||
# /__) _ _ _ _ _/ _
|
||||
# / ( (- (/ (/ (- _) / _)
|
||||
# /
|
||||
|
||||
"""
|
||||
Requests HTTP Library
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Requests is an HTTP library, written in Python, for human beings.
|
||||
Basic GET usage:
|
||||
|
||||
>>> import requests
|
||||
>>> r = requests.get('https://www.python.org')
|
||||
>>> r.status_code
|
||||
200
|
||||
>>> b'Python is a programming language' in r.content
|
||||
True
|
||||
|
||||
... or POST:
|
||||
|
||||
>>> payload = dict(key1='value1', key2='value2')
|
||||
>>> r = requests.post('https://httpbin.org/post', data=payload)
|
||||
>>> print(r.text)
|
||||
{
|
||||
...
|
||||
"form": {
|
||||
"key1": "value1",
|
||||
"key2": "value2"
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
The other HTTP methods are supported - see `requests.api`. Full documentation
|
||||
is at <http://python-requests.org>.
|
||||
|
||||
:copyright: (c) 2017 by Kenneth Reitz.
|
||||
:license: Apache 2.0, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
import urllib3
|
||||
import chardet
|
||||
import warnings
|
||||
from .exceptions import RequestsDependencyWarning
|
||||
|
||||
|
||||
def check_compatibility(urllib3_version, chardet_version):
|
||||
urllib3_version = urllib3_version.split('.')
|
||||
assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git.
|
||||
|
||||
# Sometimes, urllib3 only reports its version as 16.1.
|
||||
if len(urllib3_version) == 2:
|
||||
urllib3_version.append('0')
|
||||
|
||||
# Check urllib3 for compatibility.
|
||||
major, minor, patch = urllib3_version # noqa: F811
|
||||
major, minor, patch = int(major), int(minor), int(patch)
|
||||
# urllib3 >= 1.21.1, <= 1.25
|
||||
assert major == 1
|
||||
assert minor >= 21
|
||||
assert minor <= 25
|
||||
|
||||
# Check chardet for compatibility.
|
||||
major, minor, patch = chardet_version.split('.')[:3]
|
||||
major, minor, patch = int(major), int(minor), int(patch)
|
||||
# chardet >= 3.0.2, < 3.1.0
|
||||
assert major == 3
|
||||
assert minor < 1
|
||||
assert patch >= 2
|
||||
|
||||
|
||||
def _check_cryptography(cryptography_version):
|
||||
# cryptography < 1.3.4
|
||||
try:
|
||||
cryptography_version = list(map(int, cryptography_version.split('.')))
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
if cryptography_version < [1, 3, 4]:
|
||||
warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version)
|
||||
warnings.warn(warning, RequestsDependencyWarning)
|
||||
|
||||
# Check imported dependencies for compatibility.
|
||||
try:
|
||||
check_compatibility(urllib3.__version__, chardet.__version__)
|
||||
except (AssertionError, ValueError):
|
||||
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
||||
"version!".format(urllib3.__version__, chardet.__version__),
|
||||
RequestsDependencyWarning)
|
||||
|
||||
# Attempt to enable urllib3's SNI support, if possible
|
||||
try:
|
||||
from urllib3.contrib import pyopenssl
|
||||
pyopenssl.inject_into_urllib3()
|
||||
|
||||
# Check cryptography version
|
||||
from cryptography import __version__ as cryptography_version
|
||||
_check_cryptography(cryptography_version)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# urllib3's DependencyWarnings should be silenced.
|
||||
from urllib3.exceptions import DependencyWarning
|
||||
warnings.simplefilter('ignore', DependencyWarning)
|
||||
|
||||
from .__version__ import __title__, __description__, __url__, __version__
|
||||
from .__version__ import __build__, __author__, __author_email__, __license__
|
||||
from .__version__ import __copyright__, __cake__
|
||||
|
||||
from . import utils
|
||||
from . import packages
|
||||
from .models import Request, Response, PreparedRequest
|
||||
from .api import request, get, head, post, patch, put, delete, options
|
||||
from .sessions import session, Session
|
||||
from .status_codes import codes
|
||||
from .exceptions import (
|
||||
RequestException, Timeout, URLRequired,
|
||||
TooManyRedirects, HTTPError, ConnectionError,
|
||||
FileModeWarning, ConnectTimeout, ReadTimeout
|
||||
)
|
||||
|
||||
# Set default logging handler to avoid "No handler found" warnings.
|
||||
import logging
|
||||
from logging import NullHandler
|
||||
|
||||
logging.getLogger(__name__).addHandler(NullHandler())
|
||||
|
||||
# FileModeWarnings go off per the default.
|
||||
warnings.simplefilter('default', FileModeWarning, append=True)
|
|
@ -0,0 +1,14 @@
|
|||
# .-. .-. .-. . . .-. .-. .-. .-.
|
||||
# |( |- |.| | | |- `-. | `-.
|
||||
# ' ' `-' `-`.`-' `-' `-' ' `-'
|
||||
|
||||
__title__ = 'requests'
|
||||
__description__ = 'Python HTTP for Humans.'
|
||||
__url__ = 'http://python-requests.org'
|
||||
__version__ = '2.22.0'
|
||||
__build__ = 0x022200
|
||||
__author__ = 'Kenneth Reitz'
|
||||
__author_email__ = 'me@kennethreitz.org'
|
||||
__license__ = 'Apache 2.0'
|
||||
__copyright__ = 'Copyright 2019 Kenneth Reitz'
|
||||
__cake__ = u'\u2728 \U0001f370 \u2728'
|
|
@ -0,0 +1,42 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests._internal_utils
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Provides utility functions that are consumed internally by Requests
|
||||
which depend on extremely few external helpers (such as compat)
|
||||
"""
|
||||
|
||||
from .compat import is_py2, builtin_str, str
|
||||
|
||||
|
||||
def to_native_string(string, encoding='ascii'):
|
||||
"""Given a string object, regardless of type, returns a representation of
|
||||
that string in the native string type, encoding and decoding where
|
||||
necessary. This assumes ASCII unless told otherwise.
|
||||
"""
|
||||
if isinstance(string, builtin_str):
|
||||
out = string
|
||||
else:
|
||||
if is_py2:
|
||||
out = string.encode(encoding)
|
||||
else:
|
||||
out = string.decode(encoding)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def unicode_is_ascii(u_string):
|
||||
"""Determine if unicode string only contains ASCII characters.
|
||||
|
||||
:param str u_string: unicode string to check. Must be unicode
|
||||
and not Python 2 `str`.
|
||||
:rtype: bool
|
||||
"""
|
||||
assert isinstance(u_string, str)
|
||||
try:
|
||||
u_string.encode('ascii')
|
||||
return True
|
||||
except UnicodeEncodeError:
|
||||
return False
|
|
@ -0,0 +1,488 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.adapters
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains the transport adapters that Requests uses to define
|
||||
and maintain connections.
|
||||
"""
|
||||
|
||||
import os.path
|
||||
import socket
|
||||
|
||||
from urllib3.poolmanager import PoolManager, proxy_from_url
|
||||
from urllib3.response import HTTPResponse
|
||||
from urllib3.util import parse_url
|
||||
from urllib3.util import Timeout as TimeoutSauce
|
||||
from urllib3.util.retry import Retry
|
||||
from urllib3.exceptions import ClosedPoolError
|
||||
from urllib3.exceptions import ConnectTimeoutError
|
||||
from urllib3.exceptions import HTTPError as _HTTPError
|
||||
from urllib3.exceptions import MaxRetryError
|
||||
from urllib3.exceptions import NewConnectionError
|
||||
from urllib3.exceptions import ProxyError as _ProxyError
|
||||
from urllib3.exceptions import ProtocolError
|
||||
from urllib3.exceptions import ReadTimeoutError
|
||||
from urllib3.exceptions import SSLError as _SSLError
|
||||
from urllib3.exceptions import ResponseError
|
||||
from urllib3.exceptions import LocationValueError
|
||||
|
||||
from .models import Response
|
||||
from .compat import urlparse, basestring
|
||||
from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths,
|
||||
get_encoding_from_headers, prepend_scheme_if_needed,
|
||||
get_auth_from_url, urldefragauth, select_proxy)
|
||||
from .structures import CaseInsensitiveDict
|
||||
from .cookies import extract_cookies_to_jar
|
||||
from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError,
|
||||
ProxyError, RetryError, InvalidSchema, InvalidProxyURL,
|
||||
InvalidURL)
|
||||
from .auth import _basic_auth_str
|
||||
|
||||
try:
|
||||
from urllib3.contrib.socks import SOCKSProxyManager
|
||||
except ImportError:
|
||||
def SOCKSProxyManager(*args, **kwargs):
|
||||
raise InvalidSchema("Missing dependencies for SOCKS support.")
|
||||
|
||||
DEFAULT_POOLBLOCK = False
|
||||
DEFAULT_POOLSIZE = 10
|
||||
DEFAULT_RETRIES = 0
|
||||
DEFAULT_POOL_TIMEOUT = None
|
||||
|
||||
|
||||
class BaseAdapter(object):
|
||||
"""The Base Transport Adapter"""
|
||||
|
||||
def __init__(self):
|
||||
super(BaseAdapter, self).__init__()
|
||||
|
||||
def send(self, request, stream=False, timeout=None, verify=True,
|
||||
cert=None, proxies=None):
|
||||
"""Sends PreparedRequest object. Returns Response object.
|
||||
|
||||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
|
||||
:param stream: (optional) Whether to stream the request content.
|
||||
:param timeout: (optional) How long to wait for the server to send
|
||||
data before giving up, as a float, or a :ref:`(connect timeout,
|
||||
read timeout) <timeouts>` tuple.
|
||||
:type timeout: float or tuple
|
||||
:param verify: (optional) Either a boolean, in which case it controls whether we verify
|
||||
the server's TLS certificate, or a string, in which case it must be a path
|
||||
to a CA bundle to use
|
||||
:param cert: (optional) Any user-provided SSL certificate to be trusted.
|
||||
:param proxies: (optional) The proxies dictionary to apply to the request.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def close(self):
|
||||
"""Cleans up adapter specific items."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class HTTPAdapter(BaseAdapter):
|
||||
"""The built-in HTTP Adapter for urllib3.
|
||||
|
||||
Provides a general-case interface for Requests sessions to contact HTTP and
|
||||
HTTPS urls by implementing the Transport Adapter interface. This class will
|
||||
usually be created by the :class:`Session <Session>` class under the
|
||||
covers.
|
||||
|
||||
:param pool_connections: The number of urllib3 connection pools to cache.
|
||||
:param pool_maxsize: The maximum number of connections to save in the pool.
|
||||
:param max_retries: The maximum number of retries each connection
|
||||
should attempt. Note, this applies only to failed DNS lookups, socket
|
||||
connections and connection timeouts, never to requests where data has
|
||||
made it to the server. By default, Requests does not retry failed
|
||||
connections. If you need granular control over the conditions under
|
||||
which we retry a request, import urllib3's ``Retry`` class and pass
|
||||
that instead.
|
||||
:param pool_block: Whether the connection pool should block for connections.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import requests
|
||||
>>> s = requests.Session()
|
||||
>>> a = requests.adapters.HTTPAdapter(max_retries=3)
|
||||
>>> s.mount('http://', a)
|
||||
"""
|
||||
__attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize',
|
||||
'_pool_block']
|
||||
|
||||
def __init__(self, pool_connections=DEFAULT_POOLSIZE,
|
||||
pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES,
|
||||
pool_block=DEFAULT_POOLBLOCK):
|
||||
if max_retries == DEFAULT_RETRIES:
|
||||
self.max_retries = Retry(0, read=False)
|
||||
else:
|
||||
self.max_retries = Retry.from_int(max_retries)
|
||||
self.config = {}
|
||||
self.proxy_manager = {}
|
||||
|
||||
super(HTTPAdapter, self).__init__()
|
||||
|
||||
self._pool_connections = pool_connections
|
||||
self._pool_maxsize = pool_maxsize
|
||||
self._pool_block = pool_block
|
||||
|
||||
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)
|
||||
|
||||
def __getstate__(self):
|
||||
return {attr: getattr(self, attr, None) for attr in self.__attrs__}
|
||||
|
||||
def __setstate__(self, state):
|
||||
# Can't handle by adding 'proxy_manager' to self.__attrs__ because
|
||||
# self.poolmanager uses a lambda function, which isn't pickleable.
|
||||
self.proxy_manager = {}
|
||||
self.config = {}
|
||||
|
||||
for attr, value in state.items():
|
||||
setattr(self, attr, value)
|
||||
|
||||
self.init_poolmanager(self._pool_connections, self._pool_maxsize,
|
||||
block=self._pool_block)
|
||||
|
||||
def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
|
||||
"""Initializes a urllib3 PoolManager.
|
||||
|
||||
This method should not be called from user code, and is only
|
||||
exposed for use when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param connections: The number of urllib3 connection pools to cache.
|
||||
:param maxsize: The maximum number of connections to save in the pool.
|
||||
:param block: Block when no free connections are available.
|
||||
:param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager.
|
||||
"""
|
||||
# save these values for pickling
|
||||
self._pool_connections = connections
|
||||
self._pool_maxsize = maxsize
|
||||
self._pool_block = block
|
||||
|
||||
self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize,
|
||||
block=block, strict=True, **pool_kwargs)
|
||||
|
||||
def proxy_manager_for(self, proxy, **proxy_kwargs):
|
||||
"""Return urllib3 ProxyManager for the given proxy.
|
||||
|
||||
This method should not be called from user code, and is only
|
||||
exposed for use when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param proxy: The proxy to return a urllib3 ProxyManager for.
|
||||
:param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
|
||||
:returns: ProxyManager
|
||||
:rtype: urllib3.ProxyManager
|
||||
"""
|
||||
if proxy in self.proxy_manager:
|
||||
manager = self.proxy_manager[proxy]
|
||||
elif proxy.lower().startswith('socks'):
|
||||
username, password = get_auth_from_url(proxy)
|
||||
manager = self.proxy_manager[proxy] = SOCKSProxyManager(
|
||||
proxy,
|
||||
username=username,
|
||||
password=password,
|
||||
num_pools=self._pool_connections,
|
||||
maxsize=self._pool_maxsize,
|
||||
block=self._pool_block,
|
||||
**proxy_kwargs
|
||||
)
|
||||
else:
|
||||
proxy_headers = self.proxy_headers(proxy)
|
||||
manager = self.proxy_manager[proxy] = proxy_from_url(
|
||||
proxy,
|
||||
proxy_headers=proxy_headers,
|
||||
num_pools=self._pool_connections,
|
||||
maxsize=self._pool_maxsize,
|
||||
block=self._pool_block,
|
||||
**proxy_kwargs)
|
||||
|
||||
return manager
|
||||
|
||||
def cert_verify(self, conn, url, verify, cert):
|
||||
"""Verify a SSL certificate. This method should not be called from user
|
||||
code, and is only exposed for use when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param conn: The urllib3 connection object associated with the cert.
|
||||
:param url: The requested URL.
|
||||
:param verify: Either a boolean, in which case it controls whether we verify
|
||||
the server's TLS certificate, or a string, in which case it must be a path
|
||||
to a CA bundle to use
|
||||
:param cert: The SSL certificate to verify.
|
||||
"""
|
||||
if url.lower().startswith('https') and verify:
|
||||
|
||||
cert_loc = None
|
||||
|
||||
# Allow self-specified cert location.
|
||||
if verify is not True:
|
||||
cert_loc = verify
|
||||
|
||||
if not cert_loc:
|
||||
cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH)
|
||||
|
||||
if not cert_loc or not os.path.exists(cert_loc):
|
||||
raise IOError("Could not find a suitable TLS CA certificate bundle, "
|
||||
"invalid path: {}".format(cert_loc))
|
||||
|
||||
conn.cert_reqs = 'CERT_REQUIRED'
|
||||
|
||||
if not os.path.isdir(cert_loc):
|
||||
conn.ca_certs = cert_loc
|
||||
else:
|
||||
conn.ca_cert_dir = cert_loc
|
||||
else:
|
||||
conn.cert_reqs = 'CERT_NONE'
|
||||
conn.ca_certs = None
|
||||
conn.ca_cert_dir = None
|
||||
|
||||
if cert:
|
||||
if not isinstance(cert, basestring):
|
||||
conn.cert_file = cert[0]
|
||||
conn.key_file = cert[1]
|
||||
else:
|
||||
conn.cert_file = cert
|
||||
conn.key_file = None
|
||||
if conn.cert_file and not os.path.exists(conn.cert_file):
|
||||
raise IOError("Could not find the TLS certificate file, "
|
||||
"invalid path: {}".format(conn.cert_file))
|
||||
if conn.key_file and not os.path.exists(conn.key_file):
|
||||
raise IOError("Could not find the TLS key file, "
|
||||
"invalid path: {}".format(conn.key_file))
|
||||
|
||||
def build_response(self, req, resp):
|
||||
"""Builds a :class:`Response <requests.Response>` object from a urllib3
|
||||
response. This should not be called from user code, and is only exposed
|
||||
for use when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`
|
||||
|
||||
:param req: The :class:`PreparedRequest <PreparedRequest>` used to generate the response.
|
||||
:param resp: The urllib3 response object.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
response = Response()
|
||||
|
||||
# Fallback to None if there's no status_code, for whatever reason.
|
||||
response.status_code = getattr(resp, 'status', None)
|
||||
|
||||
# Make headers case-insensitive.
|
||||
response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {}))
|
||||
|
||||
# Set encoding.
|
||||
response.encoding = get_encoding_from_headers(response.headers)
|
||||
response.raw = resp
|
||||
response.reason = response.raw.reason
|
||||
|
||||
if isinstance(req.url, bytes):
|
||||
response.url = req.url.decode('utf-8')
|
||||
else:
|
||||
response.url = req.url
|
||||
|
||||
# Add new cookies from the server.
|
||||
extract_cookies_to_jar(response.cookies, req, resp)
|
||||
|
||||
# Give the Response some context.
|
||||
response.request = req
|
||||
response.connection = self
|
||||
|
||||
return response
|
||||
|
||||
def get_connection(self, url, proxies=None):
|
||||
"""Returns a urllib3 connection for the given URL. This should not be
|
||||
called from user code, and is only exposed for use when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param url: The URL to connect to.
|
||||
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
|
||||
:rtype: urllib3.ConnectionPool
|
||||
"""
|
||||
proxy = select_proxy(url, proxies)
|
||||
|
||||
if proxy:
|
||||
proxy = prepend_scheme_if_needed(proxy, 'http')
|
||||
proxy_url = parse_url(proxy)
|
||||
if not proxy_url.host:
|
||||
raise InvalidProxyURL("Please check proxy URL. It is malformed"
|
||||
" and could be missing the host.")
|
||||
proxy_manager = self.proxy_manager_for(proxy)
|
||||
conn = proxy_manager.connection_from_url(url)
|
||||
else:
|
||||
# Only scheme should be lower case
|
||||
parsed = urlparse(url)
|
||||
url = parsed.geturl()
|
||||
conn = self.poolmanager.connection_from_url(url)
|
||||
|
||||
return conn
|
||||
|
||||
def close(self):
|
||||
"""Disposes of any internal state.
|
||||
|
||||
Currently, this closes the PoolManager and any active ProxyManager,
|
||||
which closes any pooled connections.
|
||||
"""
|
||||
self.poolmanager.clear()
|
||||
for proxy in self.proxy_manager.values():
|
||||
proxy.clear()
|
||||
|
||||
def request_url(self, request, proxies):
|
||||
"""Obtain the url to use when making the final request.
|
||||
|
||||
If the message is being sent through a HTTP proxy, the full URL has to
|
||||
be used. Otherwise, we should only use the path portion of the URL.
|
||||
|
||||
This should not be called from user code, and is only exposed for use
|
||||
when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
|
||||
:param proxies: A dictionary of schemes or schemes and hosts to proxy URLs.
|
||||
:rtype: str
|
||||
"""
|
||||
proxy = select_proxy(request.url, proxies)
|
||||
scheme = urlparse(request.url).scheme
|
||||
|
||||
is_proxied_http_request = (proxy and scheme != 'https')
|
||||
using_socks_proxy = False
|
||||
if proxy:
|
||||
proxy_scheme = urlparse(proxy).scheme.lower()
|
||||
using_socks_proxy = proxy_scheme.startswith('socks')
|
||||
|
||||
url = request.path_url
|
||||
if is_proxied_http_request and not using_socks_proxy:
|
||||
url = urldefragauth(request.url)
|
||||
|
||||
return url
|
||||
|
||||
def add_headers(self, request, **kwargs):
|
||||
"""Add any headers needed by the connection. As of v2.0 this does
|
||||
nothing by default, but is left for overriding by users that subclass
|
||||
the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
This should not be called from user code, and is only exposed for use
|
||||
when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to.
|
||||
:param kwargs: The keyword arguments from the call to send().
|
||||
"""
|
||||
pass
|
||||
|
||||
def proxy_headers(self, proxy):
|
||||
"""Returns a dictionary of the headers to add to any request sent
|
||||
through a proxy. This works with urllib3 magic to ensure that they are
|
||||
correctly sent to the proxy, rather than in a tunnelled request if
|
||||
CONNECT is being used.
|
||||
|
||||
This should not be called from user code, and is only exposed for use
|
||||
when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param proxy: The url of the proxy being used for this request.
|
||||
:rtype: dict
|
||||
"""
|
||||
headers = {}
|
||||
username, password = get_auth_from_url(proxy)
|
||||
|
||||
if username:
|
||||
headers['Proxy-Authorization'] = _basic_auth_str(username,
|
||||
password)
|
||||
|
||||
return headers
|
||||
|
||||
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):
|
||||
"""Sends PreparedRequest object. Returns Response object.
|
||||
|
||||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
|
||||
:param stream: (optional) Whether to stream the request content.
|
||||
:param timeout: (optional) How long to wait for the server to send
|
||||
data before giving up, as a float, or a :ref:`(connect timeout,
|
||||
read timeout) <timeouts>` tuple.
|
||||
:type timeout: float or tuple or urllib3 Timeout object
|
||||
:param verify: (optional) Either a boolean, in which case it controls whether
|
||||
we verify the server's TLS certificate, or a string, in which case it
|
||||
must be a path to a CA bundle to use
|
||||
:param cert: (optional) Any user-provided SSL certificate to be trusted.
|
||||
:param proxies: (optional) The proxies dictionary to apply to the request.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
try:
|
||||
conn = self.get_connection(request.url, proxies)
|
||||
except LocationValueError as e:
|
||||
raise InvalidURL(e, request=request)
|
||||
|
||||
self.cert_verify(conn, request.url, verify, cert)
|
||||
url = self.request_url(request, proxies)
|
||||
self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies)
|
||||
|
||||
chunked = not (request.body is None or 'Content-Length' in request.headers)
|
||||
|
||||
if isinstance(timeout, tuple):
|
||||
try:
|
||||
connect, read = timeout
|
||||
timeout = TimeoutSauce(connect=connect, read=read)
|
||||
except ValueError as e:
|
||||
# this may raise a string formatting error.
|
||||
err = ("Invalid timeout {}. Pass a (connect, read) "
|
||||
"timeout tuple, or a single float to set "
|
||||
"both timeouts to the same value".format(timeout))
|
||||
raise ValueError(err)
|
||||
elif isinstance(timeout, TimeoutSauce):
|
||||
pass
|
||||
else:
|
||||
timeout = TimeoutSauce(connect=timeout, read=timeout)
|
||||
|
||||
try:
|
||||
resp = conn.urlopen(
|
||||
method=request.method,
|
||||
url=url,
|
||||
body=request.body,
|
||||
headers=request.headers,
|
||||
redirect=False,
|
||||
assert_same_host=False,
|
||||
preload_content=False,
|
||||
decode_content=False,
|
||||
retries=self.max_retries,
|
||||
timeout=timeout,
|
||||
chunked=chunked
|
||||
)
|
||||
|
||||
except (ProtocolError, socket.error) as err:
|
||||
raise ConnectionError(err, request=request)
|
||||
|
||||
except MaxRetryError as e:
|
||||
if isinstance(e.reason, ConnectTimeoutError):
|
||||
# TODO: Remove this in 3.0.0: see #2811
|
||||
if not isinstance(e.reason, NewConnectionError):
|
||||
raise ConnectTimeout(e, request=request)
|
||||
|
||||
if isinstance(e.reason, ResponseError):
|
||||
raise RetryError(e, request=request)
|
||||
|
||||
if isinstance(e.reason, _ProxyError):
|
||||
raise ProxyError(e, request=request)
|
||||
|
||||
if isinstance(e.reason, _SSLError):
|
||||
# This branch is for urllib3 v1.22 and later.
|
||||
raise SSLError(e, request=request)
|
||||
|
||||
raise ConnectionError(e, request=request)
|
||||
|
||||
except ClosedPoolError as e:
|
||||
raise ConnectionError(e, request=request)
|
||||
|
||||
except _ProxyError as e:
|
||||
raise ProxyError(e)
|
||||
|
||||
except (_SSLError, _HTTPError) as e:
|
||||
if isinstance(e, _SSLError):
|
||||
# This branch is for urllib3 versions earlier than v1.22
|
||||
raise SSLError(e, request=request)
|
||||
elif isinstance(e, ReadTimeoutError):
|
||||
raise ReadTimeout(e, request=request)
|
||||
else:
|
||||
raise
|
||||
|
||||
return self.build_response(request, resp)
|
|
@ -0,0 +1,161 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.api
|
||||
~~~~~~~~~~~~
|
||||
|
||||
This module implements the Requests API.
|
||||
|
||||
:copyright: (c) 2012 by Kenneth Reitz.
|
||||
:license: Apache2, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from . import sessions
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
"""Constructs and sends a :class:`Request <Request>`.
|
||||
|
||||
:param method: method for the new :class:`Request` object: ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``, ``PATCH``, or ``DELETE``.
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param params: (optional) Dictionary, list of tuples or bytes to send
|
||||
in the query string for the :class:`Request`.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
|
||||
:param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
|
||||
:param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
|
||||
:param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload.
|
||||
``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')``
|
||||
or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string
|
||||
defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers
|
||||
to add for the file.
|
||||
:param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
|
||||
:param timeout: (optional) How many seconds to wait for the server to send data
|
||||
before giving up, as a float, or a :ref:`(connect timeout, read
|
||||
timeout) <timeouts>` tuple.
|
||||
:type timeout: float or tuple
|
||||
:param allow_redirects: (optional) Boolean. Enable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. Defaults to ``True``.
|
||||
:type allow_redirects: bool
|
||||
:param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
|
||||
:param verify: (optional) Either a boolean, in which case it controls whether we verify
|
||||
the server's TLS certificate, or a string, in which case it must be a path
|
||||
to a CA bundle to use. Defaults to ``True``.
|
||||
:param stream: (optional) if ``False``, the response content will be immediately downloaded.
|
||||
:param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import requests
|
||||
>>> req = requests.request('GET', 'https://httpbin.org/get')
|
||||
>>> req
|
||||
<Response [200]>
|
||||
"""
|
||||
|
||||
# By using the 'with' statement we are sure the session is closed, thus we
|
||||
# avoid leaving sockets open which can trigger a ResourceWarning in some
|
||||
# cases, and look like a memory leak in others.
|
||||
with sessions.Session() as session:
|
||||
return session.request(method=method, url=url, **kwargs)
|
||||
|
||||
|
||||
def get(url, params=None, **kwargs):
|
||||
r"""Sends a GET request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param params: (optional) Dictionary, list of tuples or bytes to send
|
||||
in the query string for the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
return request('get', url, params=params, **kwargs)
|
||||
|
||||
|
||||
def options(url, **kwargs):
|
||||
r"""Sends an OPTIONS request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
return request('options', url, **kwargs)
|
||||
|
||||
|
||||
def head(url, **kwargs):
|
||||
r"""Sends a HEAD request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes. If
|
||||
`allow_redirects` is not provided, it will be set to `False` (as
|
||||
opposed to the default :meth:`request` behavior).
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
kwargs.setdefault('allow_redirects', False)
|
||||
return request('head', url, **kwargs)
|
||||
|
||||
|
||||
def post(url, data=None, json=None, **kwargs):
|
||||
r"""Sends a POST request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param json: (optional) json data to send in the body of the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return request('post', url, data=data, json=json, **kwargs)
|
||||
|
||||
|
||||
def put(url, data=None, **kwargs):
|
||||
r"""Sends a PUT request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param json: (optional) json data to send in the body of the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return request('put', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def patch(url, data=None, **kwargs):
|
||||
r"""Sends a PATCH request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param json: (optional) json data to send in the body of the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return request('patch', url, data=data, **kwargs)
|
||||
|
||||
|
||||
def delete(url, **kwargs):
|
||||
r"""Sends a DELETE request.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:return: :class:`Response <Response>` object
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return request('delete', url, **kwargs)
|
|
@ -0,0 +1,305 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.auth
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
This module contains the authentication handlers for Requests.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
import threading
|
||||
import warnings
|
||||
|
||||
from base64 import b64encode
|
||||
|
||||
from .compat import urlparse, str, basestring
|
||||
from .cookies import extract_cookies_to_jar
|
||||
from ._internal_utils import to_native_string
|
||||
from .utils import parse_dict_header
|
||||
|
||||
CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'
|
||||
CONTENT_TYPE_MULTI_PART = 'multipart/form-data'
|
||||
|
||||
|
||||
def _basic_auth_str(username, password):
|
||||
"""Returns a Basic Auth string."""
|
||||
|
||||
# "I want us to put a big-ol' comment on top of it that
|
||||
# says that this behaviour is dumb but we need to preserve
|
||||
# it because people are relying on it."
|
||||
# - Lukasa
|
||||
#
|
||||
# These are here solely to maintain backwards compatibility
|
||||
# for things like ints. This will be removed in 3.0.0.
|
||||
if not isinstance(username, basestring):
|
||||
warnings.warn(
|
||||
"Non-string usernames will no longer be supported in Requests "
|
||||
"3.0.0. Please convert the object you've passed in ({!r}) to "
|
||||
"a string or bytes object in the near future to avoid "
|
||||
"problems.".format(username),
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
username = str(username)
|
||||
|
||||
if not isinstance(password, basestring):
|
||||
warnings.warn(
|
||||
"Non-string passwords will no longer be supported in Requests "
|
||||
"3.0.0. Please convert the object you've passed in ({!r}) to "
|
||||
"a string or bytes object in the near future to avoid "
|
||||
"problems.".format(type(password)),
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
password = str(password)
|
||||
# -- End Removal --
|
||||
|
||||
if isinstance(username, str):
|
||||
username = username.encode('latin1')
|
||||
|
||||
if isinstance(password, str):
|
||||
password = password.encode('latin1')
|
||||
|
||||
authstr = 'Basic ' + to_native_string(
|
||||
b64encode(b':'.join((username, password))).strip()
|
||||
)
|
||||
|
||||
return authstr
|
||||
|
||||
|
||||
class AuthBase(object):
|
||||
"""Base class that all auth implementations derive from"""
|
||||
|
||||
def __call__(self, r):
|
||||
raise NotImplementedError('Auth hooks must be callable.')
|
||||
|
||||
|
||||
class HTTPBasicAuth(AuthBase):
|
||||
"""Attaches HTTP Basic Authentication to the given Request object."""
|
||||
|
||||
def __init__(self, username, password):
|
||||
self.username = username
|
||||
self.password = password
|
||||
|
||||
def __eq__(self, other):
|
||||
return all([
|
||||
self.username == getattr(other, 'username', None),
|
||||
self.password == getattr(other, 'password', None)
|
||||
])
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __call__(self, r):
|
||||
r.headers['Authorization'] = _basic_auth_str(self.username, self.password)
|
||||
return r
|
||||
|
||||
|
||||
class HTTPProxyAuth(HTTPBasicAuth):
|
||||
"""Attaches HTTP Proxy Authentication to a given Request object."""
|
||||
|
||||
def __call__(self, r):
|
||||
r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password)
|
||||
return r
|
||||
|
||||
|
||||
class HTTPDigestAuth(AuthBase):
|
||||
"""Attaches HTTP Digest Authentication to the given Request object."""
|
||||
|
||||
def __init__(self, username, password):
|
||||
self.username = username
|
||||
self.password = password
|
||||
# Keep state in per-thread local storage
|
||||
self._thread_local = threading.local()
|
||||
|
||||
def init_per_thread_state(self):
|
||||
# Ensure state is initialized just once per-thread
|
||||
if not hasattr(self._thread_local, 'init'):
|
||||
self._thread_local.init = True
|
||||
self._thread_local.last_nonce = ''
|
||||
self._thread_local.nonce_count = 0
|
||||
self._thread_local.chal = {}
|
||||
self._thread_local.pos = None
|
||||
self._thread_local.num_401_calls = None
|
||||
|
||||
def build_digest_header(self, method, url):
|
||||
"""
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
realm = self._thread_local.chal['realm']
|
||||
nonce = self._thread_local.chal['nonce']
|
||||
qop = self._thread_local.chal.get('qop')
|
||||
algorithm = self._thread_local.chal.get('algorithm')
|
||||
opaque = self._thread_local.chal.get('opaque')
|
||||
hash_utf8 = None
|
||||
|
||||
if algorithm is None:
|
||||
_algorithm = 'MD5'
|
||||
else:
|
||||
_algorithm = algorithm.upper()
|
||||
# lambdas assume digest modules are imported at the top level
|
||||
if _algorithm == 'MD5' or _algorithm == 'MD5-SESS':
|
||||
def md5_utf8(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('utf-8')
|
||||
return hashlib.md5(x).hexdigest()
|
||||
hash_utf8 = md5_utf8
|
||||
elif _algorithm == 'SHA':
|
||||
def sha_utf8(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('utf-8')
|
||||
return hashlib.sha1(x).hexdigest()
|
||||
hash_utf8 = sha_utf8
|
||||
elif _algorithm == 'SHA-256':
|
||||
def sha256_utf8(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('utf-8')
|
||||
return hashlib.sha256(x).hexdigest()
|
||||
hash_utf8 = sha256_utf8
|
||||
elif _algorithm == 'SHA-512':
|
||||
def sha512_utf8(x):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('utf-8')
|
||||
return hashlib.sha512(x).hexdigest()
|
||||
hash_utf8 = sha512_utf8
|
||||
|
||||
KD = lambda s, d: hash_utf8("%s:%s" % (s, d))
|
||||
|
||||
if hash_utf8 is None:
|
||||
return None
|
||||
|
||||
# XXX not implemented yet
|
||||
entdig = None
|
||||
p_parsed = urlparse(url)
|
||||
#: path is request-uri defined in RFC 2616 which should not be empty
|
||||
path = p_parsed.path or "/"
|
||||
if p_parsed.query:
|
||||
path += '?' + p_parsed.query
|
||||
|
||||
A1 = '%s:%s:%s' % (self.username, realm, self.password)
|
||||
A2 = '%s:%s' % (method, path)
|
||||
|
||||
HA1 = hash_utf8(A1)
|
||||
HA2 = hash_utf8(A2)
|
||||
|
||||
if nonce == self._thread_local.last_nonce:
|
||||
self._thread_local.nonce_count += 1
|
||||
else:
|
||||
self._thread_local.nonce_count = 1
|
||||
ncvalue = '%08x' % self._thread_local.nonce_count
|
||||
s = str(self._thread_local.nonce_count).encode('utf-8')
|
||||
s += nonce.encode('utf-8')
|
||||
s += time.ctime().encode('utf-8')
|
||||
s += os.urandom(8)
|
||||
|
||||
cnonce = (hashlib.sha1(s).hexdigest()[:16])
|
||||
if _algorithm == 'MD5-SESS':
|
||||
HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce))
|
||||
|
||||
if not qop:
|
||||
respdig = KD(HA1, "%s:%s" % (nonce, HA2))
|
||||
elif qop == 'auth' or 'auth' in qop.split(','):
|
||||
noncebit = "%s:%s:%s:%s:%s" % (
|
||||
nonce, ncvalue, cnonce, 'auth', HA2
|
||||
)
|
||||
respdig = KD(HA1, noncebit)
|
||||
else:
|
||||
# XXX handle auth-int.
|
||||
return None
|
||||
|
||||
self._thread_local.last_nonce = nonce
|
||||
|
||||
# XXX should the partial digests be encoded too?
|
||||
base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
|
||||
'response="%s"' % (self.username, realm, nonce, path, respdig)
|
||||
if opaque:
|
||||
base += ', opaque="%s"' % opaque
|
||||
if algorithm:
|
||||
base += ', algorithm="%s"' % algorithm
|
||||
if entdig:
|
||||
base += ', digest="%s"' % entdig
|
||||
if qop:
|
||||
base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce)
|
||||
|
||||
return 'Digest %s' % (base)
|
||||
|
||||
def handle_redirect(self, r, **kwargs):
|
||||
"""Reset num_401_calls counter on redirects."""
|
||||
if r.is_redirect:
|
||||
self._thread_local.num_401_calls = 1
|
||||
|
||||
def handle_401(self, r, **kwargs):
|
||||
"""
|
||||
Takes the given response and tries digest-auth, if needed.
|
||||
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
# If response is not 4xx, do not auth
|
||||
# See https://github.com/psf/requests/issues/3772
|
||||
if not 400 <= r.status_code < 500:
|
||||
self._thread_local.num_401_calls = 1
|
||||
return r
|
||||
|
||||
if self._thread_local.pos is not None:
|
||||
# Rewind the file position indicator of the body to where
|
||||
# it was to resend the request.
|
||||
r.request.body.seek(self._thread_local.pos)
|
||||
s_auth = r.headers.get('www-authenticate', '')
|
||||
|
||||
if 'digest' in s_auth.lower() and self._thread_local.num_401_calls < 2:
|
||||
|
||||
self._thread_local.num_401_calls += 1
|
||||
pat = re.compile(r'digest ', flags=re.IGNORECASE)
|
||||
self._thread_local.chal = parse_dict_header(pat.sub('', s_auth, count=1))
|
||||
|
||||
# Consume content and release the original connection
|
||||
# to allow our new request to reuse the same one.
|
||||
r.content
|
||||
r.close()
|
||||
prep = r.request.copy()
|
||||
extract_cookies_to_jar(prep._cookies, r.request, r.raw)
|
||||
prep.prepare_cookies(prep._cookies)
|
||||
|
||||
prep.headers['Authorization'] = self.build_digest_header(
|
||||
prep.method, prep.url)
|
||||
_r = r.connection.send(prep, **kwargs)
|
||||
_r.history.append(r)
|
||||
_r.request = prep
|
||||
|
||||
return _r
|
||||
|
||||
self._thread_local.num_401_calls = 1
|
||||
return r
|
||||
|
||||
def __call__(self, r):
|
||||
# Initialize per-thread state, if needed
|
||||
self.init_per_thread_state()
|
||||
# If we have a saved nonce, skip the 401
|
||||
if self._thread_local.last_nonce:
|
||||
r.headers['Authorization'] = self.build_digest_header(r.method, r.url)
|
||||
try:
|
||||
self._thread_local.pos = r.body.tell()
|
||||
except AttributeError:
|
||||
# In the case of HTTPDigestAuth being reused and the body of
|
||||
# the previous request was a file-like object, pos has the
|
||||
# file position of the previous body. Ensure it's set to
|
||||
# None.
|
||||
self._thread_local.pos = None
|
||||
r.register_hook('response', self.handle_401)
|
||||
r.register_hook('response', self.handle_redirect)
|
||||
self._thread_local.num_401_calls = 1
|
||||
|
||||
return r
|
||||
|
||||
def __eq__(self, other):
|
||||
return all([
|
||||
self.username == getattr(other, 'username', None),
|
||||
self.password == getattr(other, 'password', None)
|
||||
])
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.certs
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
This module returns the preferred default CA certificate bundle. There is
|
||||
only one — the one from the certifi package.
|
||||
|
||||
If you are packaging Requests, e.g., for a Linux distribution or a managed
|
||||
environment, you can change the definition of where() to return a separately
|
||||
packaged CA bundle.
|
||||
"""
|
||||
from certifi import where
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(where())
|
|
@ -0,0 +1,72 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.compat
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
This module handles import compatibility issues between Python 2 and
|
||||
Python 3.
|
||||
"""
|
||||
|
||||
import chardet
|
||||
|
||||
import sys
|
||||
|
||||
# -------
|
||||
# Pythons
|
||||
# -------
|
||||
|
||||
# Syntax sugar.
|
||||
_ver = sys.version_info
|
||||
|
||||
#: Python 2.x?
|
||||
is_py2 = (_ver[0] == 2)
|
||||
|
||||
#: Python 3.x?
|
||||
is_py3 = (_ver[0] == 3)
|
||||
|
||||
try:
|
||||
import simplejson as json
|
||||
except ImportError:
|
||||
import json
|
||||
|
||||
# ---------
|
||||
# Specifics
|
||||
# ---------
|
||||
|
||||
if is_py2:
|
||||
from urllib import (
|
||||
quote, unquote, quote_plus, unquote_plus, urlencode, getproxies,
|
||||
proxy_bypass, proxy_bypass_environment, getproxies_environment)
|
||||
from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
|
||||
from urllib2 import parse_http_list
|
||||
import cookielib
|
||||
from Cookie import Morsel
|
||||
from StringIO import StringIO
|
||||
# Keep OrderedDict for backwards compatibility.
|
||||
from collections import Callable, Mapping, MutableMapping, OrderedDict
|
||||
|
||||
|
||||
builtin_str = str
|
||||
bytes = str
|
||||
str = unicode
|
||||
basestring = basestring
|
||||
numeric_types = (int, long, float)
|
||||
integer_types = (int, long)
|
||||
|
||||
elif is_py3:
|
||||
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
|
||||
from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment
|
||||
from http import cookiejar as cookielib
|
||||
from http.cookies import Morsel
|
||||
from io import StringIO
|
||||
# Keep OrderedDict for backwards compatibility.
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Callable, Mapping, MutableMapping
|
||||
|
||||
builtin_str = str
|
||||
str = str
|
||||
bytes = bytes
|
||||
basestring = (str, bytes)
|
||||
numeric_types = (int, float)
|
||||
integer_types = (int,)
|
|
@ -0,0 +1,549 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.cookies
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
Compatibility code to be able to use `cookielib.CookieJar` with requests.
|
||||
|
||||
requests.utils imports from here, so be careful with imports.
|
||||
"""
|
||||
|
||||
import copy
|
||||
import time
|
||||
import calendar
|
||||
|
||||
from ._internal_utils import to_native_string
|
||||
from .compat import cookielib, urlparse, urlunparse, Morsel, MutableMapping
|
||||
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
import dummy_threading as threading
|
||||
|
||||
|
||||
class MockRequest(object):
|
||||
"""Wraps a `requests.Request` to mimic a `urllib2.Request`.
|
||||
|
||||
The code in `cookielib.CookieJar` expects this interface in order to correctly
|
||||
manage cookie policies, i.e., determine whether a cookie can be set, given the
|
||||
domains of the request and the cookie.
|
||||
|
||||
The original request object is read-only. The client is responsible for collecting
|
||||
the new headers via `get_new_headers()` and interpreting them appropriately. You
|
||||
probably want `get_cookie_header`, defined below.
|
||||
"""
|
||||
|
||||
def __init__(self, request):
|
||||
self._r = request
|
||||
self._new_headers = {}
|
||||
self.type = urlparse(self._r.url).scheme
|
||||
|
||||
def get_type(self):
|
||||
return self.type
|
||||
|
||||
def get_host(self):
|
||||
return urlparse(self._r.url).netloc
|
||||
|
||||
def get_origin_req_host(self):
|
||||
return self.get_host()
|
||||
|
||||
def get_full_url(self):
|
||||
# Only return the response's URL if the user hadn't set the Host
|
||||
# header
|
||||
if not self._r.headers.get('Host'):
|
||||
return self._r.url
|
||||
# If they did set it, retrieve it and reconstruct the expected domain
|
||||
host = to_native_string(self._r.headers['Host'], encoding='utf-8')
|
||||
parsed = urlparse(self._r.url)
|
||||
# Reconstruct the URL as we expect it
|
||||
return urlunparse([
|
||||
parsed.scheme, host, parsed.path, parsed.params, parsed.query,
|
||||
parsed.fragment
|
||||
])
|
||||
|
||||
def is_unverifiable(self):
|
||||
return True
|
||||
|
||||
def has_header(self, name):
|
||||
return name in self._r.headers or name in self._new_headers
|
||||
|
||||
def get_header(self, name, default=None):
|
||||
return self._r.headers.get(name, self._new_headers.get(name, default))
|
||||
|
||||
def add_header(self, key, val):
|
||||
"""cookielib has no legitimate use for this method; add it back if you find one."""
|
||||
raise NotImplementedError("Cookie headers should be added with add_unredirected_header()")
|
||||
|
||||
def add_unredirected_header(self, name, value):
|
||||
self._new_headers[name] = value
|
||||
|
||||
def get_new_headers(self):
|
||||
return self._new_headers
|
||||
|
||||
@property
|
||||
def unverifiable(self):
|
||||
return self.is_unverifiable()
|
||||
|
||||
@property
|
||||
def origin_req_host(self):
|
||||
return self.get_origin_req_host()
|
||||
|
||||
@property
|
||||
def host(self):
|
||||
return self.get_host()
|
||||
|
||||
|
||||
class MockResponse(object):
|
||||
"""Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`.
|
||||
|
||||
...what? Basically, expose the parsed HTTP headers from the server response
|
||||
the way `cookielib` expects to see them.
|
||||
"""
|
||||
|
||||
def __init__(self, headers):
|
||||
"""Make a MockResponse for `cookielib` to read.
|
||||
|
||||
:param headers: a httplib.HTTPMessage or analogous carrying the headers
|
||||
"""
|
||||
self._headers = headers
|
||||
|
||||
def info(self):
|
||||
return self._headers
|
||||
|
||||
def getheaders(self, name):
|
||||
self._headers.getheaders(name)
|
||||
|
||||
|
||||
def extract_cookies_to_jar(jar, request, response):
|
||||
"""Extract the cookies from the response into a CookieJar.
|
||||
|
||||
:param jar: cookielib.CookieJar (not necessarily a RequestsCookieJar)
|
||||
:param request: our own requests.Request object
|
||||
:param response: urllib3.HTTPResponse object
|
||||
"""
|
||||
if not (hasattr(response, '_original_response') and
|
||||
response._original_response):
|
||||
return
|
||||
# the _original_response field is the wrapped httplib.HTTPResponse object,
|
||||
req = MockRequest(request)
|
||||
# pull out the HTTPMessage with the headers and put it in the mock:
|
||||
res = MockResponse(response._original_response.msg)
|
||||
jar.extract_cookies(res, req)
|
||||
|
||||
|
||||
def get_cookie_header(jar, request):
|
||||
"""
|
||||
Produce an appropriate Cookie header string to be sent with `request`, or None.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
r = MockRequest(request)
|
||||
jar.add_cookie_header(r)
|
||||
return r.get_new_headers().get('Cookie')
|
||||
|
||||
|
||||
def remove_cookie_by_name(cookiejar, name, domain=None, path=None):
|
||||
"""Unsets a cookie by name, by default over all domains and paths.
|
||||
|
||||
Wraps CookieJar.clear(), is O(n).
|
||||
"""
|
||||
clearables = []
|
||||
for cookie in cookiejar:
|
||||
if cookie.name != name:
|
||||
continue
|
||||
if domain is not None and domain != cookie.domain:
|
||||
continue
|
||||
if path is not None and path != cookie.path:
|
||||
continue
|
||||
clearables.append((cookie.domain, cookie.path, cookie.name))
|
||||
|
||||
for domain, path, name in clearables:
|
||||
cookiejar.clear(domain, path, name)
|
||||
|
||||
|
||||
class CookieConflictError(RuntimeError):
|
||||
"""There are two cookies that meet the criteria specified in the cookie jar.
|
||||
Use .get and .set and include domain and path args in order to be more specific.
|
||||
"""
|
||||
|
||||
|
||||
class RequestsCookieJar(cookielib.CookieJar, MutableMapping):
|
||||
"""Compatibility class; is a cookielib.CookieJar, but exposes a dict
|
||||
interface.
|
||||
|
||||
This is the CookieJar we create by default for requests and sessions that
|
||||
don't specify one, since some clients may expect response.cookies and
|
||||
session.cookies to support dict operations.
|
||||
|
||||
Requests does not use the dict interface internally; it's just for
|
||||
compatibility with external client code. All requests code should work
|
||||
out of the box with externally provided instances of ``CookieJar``, e.g.
|
||||
``LWPCookieJar`` and ``FileCookieJar``.
|
||||
|
||||
Unlike a regular CookieJar, this class is pickleable.
|
||||
|
||||
.. warning:: dictionary operations that are normally O(1) may be O(n).
|
||||
"""
|
||||
|
||||
def get(self, name, default=None, domain=None, path=None):
|
||||
"""Dict-like get() that also supports optional domain and path args in
|
||||
order to resolve naming collisions from using one cookie jar over
|
||||
multiple domains.
|
||||
|
||||
.. warning:: operation is O(n), not O(1).
|
||||
"""
|
||||
try:
|
||||
return self._find_no_duplicates(name, domain, path)
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def set(self, name, value, **kwargs):
|
||||
"""Dict-like set() that also supports optional domain and path args in
|
||||
order to resolve naming collisions from using one cookie jar over
|
||||
multiple domains.
|
||||
"""
|
||||
# support client code that unsets cookies by assignment of a None value:
|
||||
if value is None:
|
||||
remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path'))
|
||||
return
|
||||
|
||||
if isinstance(value, Morsel):
|
||||
c = morsel_to_cookie(value)
|
||||
else:
|
||||
c = create_cookie(name, value, **kwargs)
|
||||
self.set_cookie(c)
|
||||
return c
|
||||
|
||||
def iterkeys(self):
|
||||
"""Dict-like iterkeys() that returns an iterator of names of cookies
|
||||
from the jar.
|
||||
|
||||
.. seealso:: itervalues() and iteritems().
|
||||
"""
|
||||
for cookie in iter(self):
|
||||
yield cookie.name
|
||||
|
||||
def keys(self):
|
||||
"""Dict-like keys() that returns a list of names of cookies from the
|
||||
jar.
|
||||
|
||||
.. seealso:: values() and items().
|
||||
"""
|
||||
return list(self.iterkeys())
|
||||
|
||||
def itervalues(self):
|
||||
"""Dict-like itervalues() that returns an iterator of values of cookies
|
||||
from the jar.
|
||||
|
||||
.. seealso:: iterkeys() and iteritems().
|
||||
"""
|
||||
for cookie in iter(self):
|
||||
yield cookie.value
|
||||
|
||||
def values(self):
|
||||
"""Dict-like values() that returns a list of values of cookies from the
|
||||
jar.
|
||||
|
||||
.. seealso:: keys() and items().
|
||||
"""
|
||||
return list(self.itervalues())
|
||||
|
||||
def iteritems(self):
|
||||
"""Dict-like iteritems() that returns an iterator of name-value tuples
|
||||
from the jar.
|
||||
|
||||
.. seealso:: iterkeys() and itervalues().
|
||||
"""
|
||||
for cookie in iter(self):
|
||||
yield cookie.name, cookie.value
|
||||
|
||||
def items(self):
|
||||
"""Dict-like items() that returns a list of name-value tuples from the
|
||||
jar. Allows client-code to call ``dict(RequestsCookieJar)`` and get a
|
||||
vanilla python dict of key value pairs.
|
||||
|
||||
.. seealso:: keys() and values().
|
||||
"""
|
||||
return list(self.iteritems())
|
||||
|
||||
def list_domains(self):
|
||||
"""Utility method to list all the domains in the jar."""
|
||||
domains = []
|
||||
for cookie in iter(self):
|
||||
if cookie.domain not in domains:
|
||||
domains.append(cookie.domain)
|
||||
return domains
|
||||
|
||||
def list_paths(self):
|
||||
"""Utility method to list all the paths in the jar."""
|
||||
paths = []
|
||||
for cookie in iter(self):
|
||||
if cookie.path not in paths:
|
||||
paths.append(cookie.path)
|
||||
return paths
|
||||
|
||||
def multiple_domains(self):
|
||||
"""Returns True if there are multiple domains in the jar.
|
||||
Returns False otherwise.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
domains = []
|
||||
for cookie in iter(self):
|
||||
if cookie.domain is not None and cookie.domain in domains:
|
||||
return True
|
||||
domains.append(cookie.domain)
|
||||
return False # there is only one domain in jar
|
||||
|
||||
def get_dict(self, domain=None, path=None):
|
||||
"""Takes as an argument an optional domain and path and returns a plain
|
||||
old Python dict of name-value pairs of cookies that meet the
|
||||
requirements.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
dictionary = {}
|
||||
for cookie in iter(self):
|
||||
if (
|
||||
(domain is None or cookie.domain == domain) and
|
||||
(path is None or cookie.path == path)
|
||||
):
|
||||
dictionary[cookie.name] = cookie.value
|
||||
return dictionary
|
||||
|
||||
def __contains__(self, name):
|
||||
try:
|
||||
return super(RequestsCookieJar, self).__contains__(name)
|
||||
except CookieConflictError:
|
||||
return True
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Dict-like __getitem__() for compatibility with client code. Throws
|
||||
exception if there are more than one cookie with name. In that case,
|
||||
use the more explicit get() method instead.
|
||||
|
||||
.. warning:: operation is O(n), not O(1).
|
||||
"""
|
||||
return self._find_no_duplicates(name)
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
"""Dict-like __setitem__ for compatibility with client code. Throws
|
||||
exception if there is already a cookie of that name in the jar. In that
|
||||
case, use the more explicit set() method instead.
|
||||
"""
|
||||
self.set(name, value)
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s
|
||||
``remove_cookie_by_name()``.
|
||||
"""
|
||||
remove_cookie_by_name(self, name)
|
||||
|
||||
def set_cookie(self, cookie, *args, **kwargs):
|
||||
if hasattr(cookie.value, 'startswith') and cookie.value.startswith('"') and cookie.value.endswith('"'):
|
||||
cookie.value = cookie.value.replace('\\"', '')
|
||||
return super(RequestsCookieJar, self).set_cookie(cookie, *args, **kwargs)
|
||||
|
||||
def update(self, other):
|
||||
"""Updates this jar with cookies from another CookieJar or dict-like"""
|
||||
if isinstance(other, cookielib.CookieJar):
|
||||
for cookie in other:
|
||||
self.set_cookie(copy.copy(cookie))
|
||||
else:
|
||||
super(RequestsCookieJar, self).update(other)
|
||||
|
||||
def _find(self, name, domain=None, path=None):
|
||||
"""Requests uses this method internally to get cookie values.
|
||||
|
||||
If there are conflicting cookies, _find arbitrarily chooses one.
|
||||
See _find_no_duplicates if you want an exception thrown if there are
|
||||
conflicting cookies.
|
||||
|
||||
:param name: a string containing name of cookie
|
||||
:param domain: (optional) string containing domain of cookie
|
||||
:param path: (optional) string containing path of cookie
|
||||
:return: cookie.value
|
||||
"""
|
||||
for cookie in iter(self):
|
||||
if cookie.name == name:
|
||||
if domain is None or cookie.domain == domain:
|
||||
if path is None or cookie.path == path:
|
||||
return cookie.value
|
||||
|
||||
raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path))
|
||||
|
||||
def _find_no_duplicates(self, name, domain=None, path=None):
|
||||
"""Both ``__get_item__`` and ``get`` call this function: it's never
|
||||
used elsewhere in Requests.
|
||||
|
||||
:param name: a string containing name of cookie
|
||||
:param domain: (optional) string containing domain of cookie
|
||||
:param path: (optional) string containing path of cookie
|
||||
:raises KeyError: if cookie is not found
|
||||
:raises CookieConflictError: if there are multiple cookies
|
||||
that match name and optionally domain and path
|
||||
:return: cookie.value
|
||||
"""
|
||||
toReturn = None
|
||||
for cookie in iter(self):
|
||||
if cookie.name == name:
|
||||
if domain is None or cookie.domain == domain:
|
||||
if path is None or cookie.path == path:
|
||||
if toReturn is not None: # if there are multiple cookies that meet passed in criteria
|
||||
raise CookieConflictError('There are multiple cookies with name, %r' % (name))
|
||||
toReturn = cookie.value # we will eventually return this as long as no cookie conflict
|
||||
|
||||
if toReturn:
|
||||
return toReturn
|
||||
raise KeyError('name=%r, domain=%r, path=%r' % (name, domain, path))
|
||||
|
||||
def __getstate__(self):
|
||||
"""Unlike a normal CookieJar, this class is pickleable."""
|
||||
state = self.__dict__.copy()
|
||||
# remove the unpickleable RLock object
|
||||
state.pop('_cookies_lock')
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""Unlike a normal CookieJar, this class is pickleable."""
|
||||
self.__dict__.update(state)
|
||||
if '_cookies_lock' not in self.__dict__:
|
||||
self._cookies_lock = threading.RLock()
|
||||
|
||||
def copy(self):
|
||||
"""Return a copy of this RequestsCookieJar."""
|
||||
new_cj = RequestsCookieJar()
|
||||
new_cj.set_policy(self.get_policy())
|
||||
new_cj.update(self)
|
||||
return new_cj
|
||||
|
||||
def get_policy(self):
|
||||
"""Return the CookiePolicy instance used."""
|
||||
return self._policy
|
||||
|
||||
|
||||
def _copy_cookie_jar(jar):
|
||||
if jar is None:
|
||||
return None
|
||||
|
||||
if hasattr(jar, 'copy'):
|
||||
# We're dealing with an instance of RequestsCookieJar
|
||||
return jar.copy()
|
||||
# We're dealing with a generic CookieJar instance
|
||||
new_jar = copy.copy(jar)
|
||||
new_jar.clear()
|
||||
for cookie in jar:
|
||||
new_jar.set_cookie(copy.copy(cookie))
|
||||
return new_jar
|
||||
|
||||
|
||||
def create_cookie(name, value, **kwargs):
|
||||
"""Make a cookie from underspecified parameters.
|
||||
|
||||
By default, the pair of `name` and `value` will be set for the domain ''
|
||||
and sent on every request (this is sometimes called a "supercookie").
|
||||
"""
|
||||
result = {
|
||||
'version': 0,
|
||||
'name': name,
|
||||
'value': value,
|
||||
'port': None,
|
||||
'domain': '',
|
||||
'path': '/',
|
||||
'secure': False,
|
||||
'expires': None,
|
||||
'discard': True,
|
||||
'comment': None,
|
||||
'comment_url': None,
|
||||
'rest': {'HttpOnly': None},
|
||||
'rfc2109': False,
|
||||
}
|
||||
|
||||
badargs = set(kwargs) - set(result)
|
||||
if badargs:
|
||||
err = 'create_cookie() got unexpected keyword arguments: %s'
|
||||
raise TypeError(err % list(badargs))
|
||||
|
||||
result.update(kwargs)
|
||||
result['port_specified'] = bool(result['port'])
|
||||
result['domain_specified'] = bool(result['domain'])
|
||||
result['domain_initial_dot'] = result['domain'].startswith('.')
|
||||
result['path_specified'] = bool(result['path'])
|
||||
|
||||
return cookielib.Cookie(**result)
|
||||
|
||||
|
||||
def morsel_to_cookie(morsel):
|
||||
"""Convert a Morsel object into a Cookie containing the one k/v pair."""
|
||||
|
||||
expires = None
|
||||
if morsel['max-age']:
|
||||
try:
|
||||
expires = int(time.time() + int(morsel['max-age']))
|
||||
except ValueError:
|
||||
raise TypeError('max-age: %s must be integer' % morsel['max-age'])
|
||||
elif morsel['expires']:
|
||||
time_template = '%a, %d-%b-%Y %H:%M:%S GMT'
|
||||
expires = calendar.timegm(
|
||||
time.strptime(morsel['expires'], time_template)
|
||||
)
|
||||
return create_cookie(
|
||||
comment=morsel['comment'],
|
||||
comment_url=bool(morsel['comment']),
|
||||
discard=False,
|
||||
domain=morsel['domain'],
|
||||
expires=expires,
|
||||
name=morsel.key,
|
||||
path=morsel['path'],
|
||||
port=None,
|
||||
rest={'HttpOnly': morsel['httponly']},
|
||||
rfc2109=False,
|
||||
secure=bool(morsel['secure']),
|
||||
value=morsel.value,
|
||||
version=morsel['version'] or 0,
|
||||
)
|
||||
|
||||
|
||||
def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True):
|
||||
"""Returns a CookieJar from a key/value dictionary.
|
||||
|
||||
:param cookie_dict: Dict of key/values to insert into CookieJar.
|
||||
:param cookiejar: (optional) A cookiejar to add the cookies to.
|
||||
:param overwrite: (optional) If False, will not replace cookies
|
||||
already in the jar with new ones.
|
||||
:rtype: CookieJar
|
||||
"""
|
||||
if cookiejar is None:
|
||||
cookiejar = RequestsCookieJar()
|
||||
|
||||
if cookie_dict is not None:
|
||||
names_from_jar = [cookie.name for cookie in cookiejar]
|
||||
for name in cookie_dict:
|
||||
if overwrite or (name not in names_from_jar):
|
||||
cookiejar.set_cookie(create_cookie(name, cookie_dict[name]))
|
||||
|
||||
return cookiejar
|
||||
|
||||
|
||||
def merge_cookies(cookiejar, cookies):
|
||||
"""Add cookies to cookiejar and returns a merged CookieJar.
|
||||
|
||||
:param cookiejar: CookieJar object to add the cookies to.
|
||||
:param cookies: Dictionary or CookieJar object to be added.
|
||||
:rtype: CookieJar
|
||||
"""
|
||||
if not isinstance(cookiejar, cookielib.CookieJar):
|
||||
raise ValueError('You can only merge into CookieJar')
|
||||
|
||||
if isinstance(cookies, dict):
|
||||
cookiejar = cookiejar_from_dict(
|
||||
cookies, cookiejar=cookiejar, overwrite=False)
|
||||
elif isinstance(cookies, cookielib.CookieJar):
|
||||
try:
|
||||
cookiejar.update(cookies)
|
||||
except AttributeError:
|
||||
for cookie_in_jar in cookies:
|
||||
cookiejar.set_cookie(cookie_in_jar)
|
||||
|
||||
return cookiejar
|
|
@ -0,0 +1,126 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.exceptions
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains the set of Requests' exceptions.
|
||||
"""
|
||||
from urllib3.exceptions import HTTPError as BaseHTTPError
|
||||
|
||||
|
||||
class RequestException(IOError):
|
||||
"""There was an ambiguous exception that occurred while handling your
|
||||
request.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Initialize RequestException with `request` and `response` objects."""
|
||||
response = kwargs.pop('response', None)
|
||||
self.response = response
|
||||
self.request = kwargs.pop('request', None)
|
||||
if (response is not None and not self.request and
|
||||
hasattr(response, 'request')):
|
||||
self.request = self.response.request
|
||||
super(RequestException, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class HTTPError(RequestException):
|
||||
"""An HTTP error occurred."""
|
||||
|
||||
|
||||
class ConnectionError(RequestException):
|
||||
"""A Connection error occurred."""
|
||||
|
||||
|
||||
class ProxyError(ConnectionError):
|
||||
"""A proxy error occurred."""
|
||||
|
||||
|
||||
class SSLError(ConnectionError):
|
||||
"""An SSL error occurred."""
|
||||
|
||||
|
||||
class Timeout(RequestException):
|
||||
"""The request timed out.
|
||||
|
||||
Catching this error will catch both
|
||||
:exc:`~requests.exceptions.ConnectTimeout` and
|
||||
:exc:`~requests.exceptions.ReadTimeout` errors.
|
||||
"""
|
||||
|
||||
|
||||
class ConnectTimeout(ConnectionError, Timeout):
|
||||
"""The request timed out while trying to connect to the remote server.
|
||||
|
||||
Requests that produced this error are safe to retry.
|
||||
"""
|
||||
|
||||
|
||||
class ReadTimeout(Timeout):
|
||||
"""The server did not send any data in the allotted amount of time."""
|
||||
|
||||
|
||||
class URLRequired(RequestException):
|
||||
"""A valid URL is required to make a request."""
|
||||
|
||||
|
||||
class TooManyRedirects(RequestException):
|
||||
"""Too many redirects."""
|
||||
|
||||
|
||||
class MissingSchema(RequestException, ValueError):
|
||||
"""The URL schema (e.g. http or https) is missing."""
|
||||
|
||||
|
||||
class InvalidSchema(RequestException, ValueError):
|
||||
"""See defaults.py for valid schemas."""
|
||||
|
||||
|
||||
class InvalidURL(RequestException, ValueError):
|
||||
"""The URL provided was somehow invalid."""
|
||||
|
||||
|
||||
class InvalidHeader(RequestException, ValueError):
|
||||
"""The header value provided was somehow invalid."""
|
||||
|
||||
|
||||
class InvalidProxyURL(InvalidURL):
|
||||
"""The proxy URL provided is invalid."""
|
||||
|
||||
|
||||
class ChunkedEncodingError(RequestException):
|
||||
"""The server declared chunked encoding but sent an invalid chunk."""
|
||||
|
||||
|
||||
class ContentDecodingError(RequestException, BaseHTTPError):
|
||||
"""Failed to decode response content"""
|
||||
|
||||
|
||||
class StreamConsumedError(RequestException, TypeError):
|
||||
"""The content for this response was already consumed"""
|
||||
|
||||
|
||||
class RetryError(RequestException):
|
||||
"""Custom retries logic failed"""
|
||||
|
||||
|
||||
class UnrewindableBodyError(RequestException):
|
||||
"""Requests encountered an error when trying to rewind a body"""
|
||||
|
||||
# Warnings
|
||||
|
||||
|
||||
class RequestsWarning(Warning):
|
||||
"""Base warning for Requests."""
|
||||
pass
|
||||
|
||||
|
||||
class FileModeWarning(RequestsWarning, DeprecationWarning):
|
||||
"""A file was opened in text mode, but Requests determined its binary length."""
|
||||
pass
|
||||
|
||||
|
||||
class RequestsDependencyWarning(RequestsWarning):
|
||||
"""An imported dependency doesn't match the expected version range."""
|
||||
pass
|
|
@ -0,0 +1,119 @@
|
|||
"""Module containing bug report helper(s)."""
|
||||
from __future__ import print_function
|
||||
|
||||
import json
|
||||
import platform
|
||||
import sys
|
||||
import ssl
|
||||
|
||||
import idna
|
||||
import urllib3
|
||||
import chardet
|
||||
|
||||
from . import __version__ as requests_version
|
||||
|
||||
try:
|
||||
from urllib3.contrib import pyopenssl
|
||||
except ImportError:
|
||||
pyopenssl = None
|
||||
OpenSSL = None
|
||||
cryptography = None
|
||||
else:
|
||||
import OpenSSL
|
||||
import cryptography
|
||||
|
||||
|
||||
def _implementation():
|
||||
"""Return a dict with the Python implementation and version.
|
||||
|
||||
Provide both the name and the version of the Python implementation
|
||||
currently running. For example, on CPython 2.7.5 it will return
|
||||
{'name': 'CPython', 'version': '2.7.5'}.
|
||||
|
||||
This function works best on CPython and PyPy: in particular, it probably
|
||||
doesn't work for Jython or IronPython. Future investigation should be done
|
||||
to work out the correct shape of the code for those platforms.
|
||||
"""
|
||||
implementation = platform.python_implementation()
|
||||
|
||||
if implementation == 'CPython':
|
||||
implementation_version = platform.python_version()
|
||||
elif implementation == 'PyPy':
|
||||
implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major,
|
||||
sys.pypy_version_info.minor,
|
||||
sys.pypy_version_info.micro)
|
||||
if sys.pypy_version_info.releaselevel != 'final':
|
||||
implementation_version = ''.join([
|
||||
implementation_version, sys.pypy_version_info.releaselevel
|
||||
])
|
||||
elif implementation == 'Jython':
|
||||
implementation_version = platform.python_version() # Complete Guess
|
||||
elif implementation == 'IronPython':
|
||||
implementation_version = platform.python_version() # Complete Guess
|
||||
else:
|
||||
implementation_version = 'Unknown'
|
||||
|
||||
return {'name': implementation, 'version': implementation_version}
|
||||
|
||||
|
||||
def info():
|
||||
"""Generate information for a bug report."""
|
||||
try:
|
||||
platform_info = {
|
||||
'system': platform.system(),
|
||||
'release': platform.release(),
|
||||
}
|
||||
except IOError:
|
||||
platform_info = {
|
||||
'system': 'Unknown',
|
||||
'release': 'Unknown',
|
||||
}
|
||||
|
||||
implementation_info = _implementation()
|
||||
urllib3_info = {'version': urllib3.__version__}
|
||||
chardet_info = {'version': chardet.__version__}
|
||||
|
||||
pyopenssl_info = {
|
||||
'version': None,
|
||||
'openssl_version': '',
|
||||
}
|
||||
if OpenSSL:
|
||||
pyopenssl_info = {
|
||||
'version': OpenSSL.__version__,
|
||||
'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER,
|
||||
}
|
||||
cryptography_info = {
|
||||
'version': getattr(cryptography, '__version__', ''),
|
||||
}
|
||||
idna_info = {
|
||||
'version': getattr(idna, '__version__', ''),
|
||||
}
|
||||
|
||||
system_ssl = ssl.OPENSSL_VERSION_NUMBER
|
||||
system_ssl_info = {
|
||||
'version': '%x' % system_ssl if system_ssl is not None else ''
|
||||
}
|
||||
|
||||
return {
|
||||
'platform': platform_info,
|
||||
'implementation': implementation_info,
|
||||
'system_ssl': system_ssl_info,
|
||||
'using_pyopenssl': pyopenssl is not None,
|
||||
'pyOpenSSL': pyopenssl_info,
|
||||
'urllib3': urllib3_info,
|
||||
'chardet': chardet_info,
|
||||
'cryptography': cryptography_info,
|
||||
'idna': idna_info,
|
||||
'requests': {
|
||||
'version': requests_version,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Pretty-print the bug information as JSON."""
|
||||
print(json.dumps(info(), sort_keys=True, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,34 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.hooks
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
This module provides the capabilities for the Requests hooks system.
|
||||
|
||||
Available hooks:
|
||||
|
||||
``response``:
|
||||
The response generated from a Request.
|
||||
"""
|
||||
HOOKS = ['response']
|
||||
|
||||
|
||||
def default_hooks():
|
||||
return {event: [] for event in HOOKS}
|
||||
|
||||
# TODO: response is the only one
|
||||
|
||||
|
||||
def dispatch_hook(key, hooks, hook_data, **kwargs):
|
||||
"""Dispatches a hook dictionary on a given piece of data."""
|
||||
hooks = hooks or {}
|
||||
hooks = hooks.get(key)
|
||||
if hooks:
|
||||
if hasattr(hooks, '__call__'):
|
||||
hooks = [hooks]
|
||||
for hook in hooks:
|
||||
_hook_data = hook(hook_data, **kwargs)
|
||||
if _hook_data is not None:
|
||||
hook_data = _hook_data
|
||||
return hook_data
|
|
@ -0,0 +1,974 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.models
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
This module contains the primary objects that power Requests.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import sys
|
||||
|
||||
# Import encoding now, to avoid implicit import later.
|
||||
# Implicit import within threads may cause LookupError when standard library is in a ZIP,
|
||||
# such as in Embedded Python. See https://github.com/psf/requests/issues/3578.
|
||||
import encodings.idna
|
||||
|
||||
from urllib3.fields import RequestField
|
||||
from urllib3.filepost import encode_multipart_formdata
|
||||
from urllib3.util import parse_url
|
||||
from urllib3.exceptions import (
|
||||
DecodeError, ReadTimeoutError, ProtocolError, LocationParseError)
|
||||
|
||||
from io import UnsupportedOperation
|
||||
from .hooks import default_hooks
|
||||
from .structures import CaseInsensitiveDict
|
||||
|
||||
from .auth import HTTPBasicAuth
|
||||
from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar
|
||||
from .exceptions import (
|
||||
HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError,
|
||||
ContentDecodingError, ConnectionError, StreamConsumedError)
|
||||
from ._internal_utils import to_native_string, unicode_is_ascii
|
||||
from .utils import (
|
||||
guess_filename, get_auth_from_url, requote_uri,
|
||||
stream_decode_response_unicode, to_key_val_list, parse_header_links,
|
||||
iter_slices, guess_json_utf, super_len, check_header_validity)
|
||||
from .compat import (
|
||||
Callable, Mapping,
|
||||
cookielib, urlunparse, urlsplit, urlencode, str, bytes,
|
||||
is_py2, chardet, builtin_str, basestring)
|
||||
from .compat import json as complexjson
|
||||
from .status_codes import codes
|
||||
|
||||
#: The set of HTTP status codes that indicate an automatically
|
||||
#: processable redirect.
|
||||
REDIRECT_STATI = (
|
||||
codes.moved, # 301
|
||||
codes.found, # 302
|
||||
codes.other, # 303
|
||||
codes.temporary_redirect, # 307
|
||||
codes.permanent_redirect, # 308
|
||||
)
|
||||
|
||||
DEFAULT_REDIRECT_LIMIT = 30
|
||||
CONTENT_CHUNK_SIZE = 10 * 1024
|
||||
ITER_CHUNK_SIZE = 512
|
||||
|
||||
|
||||
class RequestEncodingMixin(object):
|
||||
@property
|
||||
def path_url(self):
|
||||
"""Build the path URL to use."""
|
||||
|
||||
url = []
|
||||
|
||||
p = urlsplit(self.url)
|
||||
|
||||
path = p.path
|
||||
if not path:
|
||||
path = '/'
|
||||
|
||||
url.append(path)
|
||||
|
||||
query = p.query
|
||||
if query:
|
||||
url.append('?')
|
||||
url.append(query)
|
||||
|
||||
return ''.join(url)
|
||||
|
||||
@staticmethod
|
||||
def _encode_params(data):
|
||||
"""Encode parameters in a piece of data.
|
||||
|
||||
Will successfully encode parameters when passed as a dict or a list of
|
||||
2-tuples. Order is retained if data is a list of 2-tuples but arbitrary
|
||||
if parameters are supplied as a dict.
|
||||
"""
|
||||
|
||||
if isinstance(data, (str, bytes)):
|
||||
return data
|
||||
elif hasattr(data, 'read'):
|
||||
return data
|
||||
elif hasattr(data, '__iter__'):
|
||||
result = []
|
||||
for k, vs in to_key_val_list(data):
|
||||
if isinstance(vs, basestring) or not hasattr(vs, '__iter__'):
|
||||
vs = [vs]
|
||||
for v in vs:
|
||||
if v is not None:
|
||||
result.append(
|
||||
(k.encode('utf-8') if isinstance(k, str) else k,
|
||||
v.encode('utf-8') if isinstance(v, str) else v))
|
||||
return urlencode(result, doseq=True)
|
||||
else:
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _encode_files(files, data):
|
||||
"""Build the body for a multipart/form-data request.
|
||||
|
||||
Will successfully encode files when passed as a dict or a list of
|
||||
tuples. Order is retained if data is a list of tuples but arbitrary
|
||||
if parameters are supplied as a dict.
|
||||
The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
|
||||
or 4-tuples (filename, fileobj, contentype, custom_headers).
|
||||
"""
|
||||
if (not files):
|
||||
raise ValueError("Files must be provided.")
|
||||
elif isinstance(data, basestring):
|
||||
raise ValueError("Data must not be a string.")
|
||||
|
||||
new_fields = []
|
||||
fields = to_key_val_list(data or {})
|
||||
files = to_key_val_list(files or {})
|
||||
|
||||
for field, val in fields:
|
||||
if isinstance(val, basestring) or not hasattr(val, '__iter__'):
|
||||
val = [val]
|
||||
for v in val:
|
||||
if v is not None:
|
||||
# Don't call str() on bytestrings: in Py3 it all goes wrong.
|
||||
if not isinstance(v, bytes):
|
||||
v = str(v)
|
||||
|
||||
new_fields.append(
|
||||
(field.decode('utf-8') if isinstance(field, bytes) else field,
|
||||
v.encode('utf-8') if isinstance(v, str) else v))
|
||||
|
||||
for (k, v) in files:
|
||||
# support for explicit filename
|
||||
ft = None
|
||||
fh = None
|
||||
if isinstance(v, (tuple, list)):
|
||||
if len(v) == 2:
|
||||
fn, fp = v
|
||||
elif len(v) == 3:
|
||||
fn, fp, ft = v
|
||||
else:
|
||||
fn, fp, ft, fh = v
|
||||
else:
|
||||
fn = guess_filename(v) or k
|
||||
fp = v
|
||||
|
||||
if isinstance(fp, (str, bytes, bytearray)):
|
||||
fdata = fp
|
||||
elif hasattr(fp, 'read'):
|
||||
fdata = fp.read()
|
||||
elif fp is None:
|
||||
continue
|
||||
else:
|
||||
fdata = fp
|
||||
|
||||
rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
|
||||
rf.make_multipart(content_type=ft)
|
||||
new_fields.append(rf)
|
||||
|
||||
body, content_type = encode_multipart_formdata(new_fields)
|
||||
|
||||
return body, content_type
|
||||
|
||||
|
||||
class RequestHooksMixin(object):
|
||||
def register_hook(self, event, hook):
|
||||
"""Properly register a hook."""
|
||||
|
||||
if event not in self.hooks:
|
||||
raise ValueError('Unsupported event specified, with event name "%s"' % (event))
|
||||
|
||||
if isinstance(hook, Callable):
|
||||
self.hooks[event].append(hook)
|
||||
elif hasattr(hook, '__iter__'):
|
||||
self.hooks[event].extend(h for h in hook if isinstance(h, Callable))
|
||||
|
||||
def deregister_hook(self, event, hook):
|
||||
"""Deregister a previously registered hook.
|
||||
Returns True if the hook existed, False if not.
|
||||
"""
|
||||
|
||||
try:
|
||||
self.hooks[event].remove(hook)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
class Request(RequestHooksMixin):
|
||||
"""A user-created :class:`Request <Request>` object.
|
||||
|
||||
Used to prepare a :class:`PreparedRequest <PreparedRequest>`, which is sent to the server.
|
||||
|
||||
:param method: HTTP method to use.
|
||||
:param url: URL to send.
|
||||
:param headers: dictionary of headers to send.
|
||||
:param files: dictionary of {filename: fileobject} files to multipart upload.
|
||||
:param data: the body to attach to the request. If a dictionary or
|
||||
list of tuples ``[(key, value)]`` is provided, form-encoding will
|
||||
take place.
|
||||
:param json: json for the body to attach to the request (if files or data is not specified).
|
||||
:param params: URL parameters to append to the URL. If a dictionary or
|
||||
list of tuples ``[(key, value)]`` is provided, form-encoding will
|
||||
take place.
|
||||
:param auth: Auth handler or (user, pass) tuple.
|
||||
:param cookies: dictionary or CookieJar of cookies to attach to this request.
|
||||
:param hooks: dictionary of callback hooks, for internal usage.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import requests
|
||||
>>> req = requests.Request('GET', 'https://httpbin.org/get')
|
||||
>>> req.prepare()
|
||||
<PreparedRequest [GET]>
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
method=None, url=None, headers=None, files=None, data=None,
|
||||
params=None, auth=None, cookies=None, hooks=None, json=None):
|
||||
|
||||
# Default empty dicts for dict params.
|
||||
data = [] if data is None else data
|
||||
files = [] if files is None else files
|
||||
headers = {} if headers is None else headers
|
||||
params = {} if params is None else params
|
||||
hooks = {} if hooks is None else hooks
|
||||
|
||||
self.hooks = default_hooks()
|
||||
for (k, v) in list(hooks.items()):
|
||||
self.register_hook(event=k, hook=v)
|
||||
|
||||
self.method = method
|
||||
self.url = url
|
||||
self.headers = headers
|
||||
self.files = files
|
||||
self.data = data
|
||||
self.json = json
|
||||
self.params = params
|
||||
self.auth = auth
|
||||
self.cookies = cookies
|
||||
|
||||
def __repr__(self):
|
||||
return '<Request [%s]>' % (self.method)
|
||||
|
||||
def prepare(self):
|
||||
"""Constructs a :class:`PreparedRequest <PreparedRequest>` for transmission and returns it."""
|
||||
p = PreparedRequest()
|
||||
p.prepare(
|
||||
method=self.method,
|
||||
url=self.url,
|
||||
headers=self.headers,
|
||||
files=self.files,
|
||||
data=self.data,
|
||||
json=self.json,
|
||||
params=self.params,
|
||||
auth=self.auth,
|
||||
cookies=self.cookies,
|
||||
hooks=self.hooks,
|
||||
)
|
||||
return p
|
||||
|
||||
|
||||
class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
|
||||
"""The fully mutable :class:`PreparedRequest <PreparedRequest>` object,
|
||||
containing the exact bytes that will be sent to the server.
|
||||
|
||||
Generated from either a :class:`Request <Request>` object or manually.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import requests
|
||||
>>> req = requests.Request('GET', 'https://httpbin.org/get')
|
||||
>>> r = req.prepare()
|
||||
>>> r
|
||||
<PreparedRequest [GET]>
|
||||
|
||||
>>> s = requests.Session()
|
||||
>>> s.send(r)
|
||||
<Response [200]>
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
#: HTTP verb to send to the server.
|
||||
self.method = None
|
||||
#: HTTP URL to send the request to.
|
||||
self.url = None
|
||||
#: dictionary of HTTP headers.
|
||||
self.headers = None
|
||||
# The `CookieJar` used to create the Cookie header will be stored here
|
||||
# after prepare_cookies is called
|
||||
self._cookies = None
|
||||
#: request body to send to the server.
|
||||
self.body = None
|
||||
#: dictionary of callback hooks, for internal usage.
|
||||
self.hooks = default_hooks()
|
||||
#: integer denoting starting position of a readable file-like body.
|
||||
self._body_position = None
|
||||
|
||||
def prepare(self,
|
||||
method=None, url=None, headers=None, files=None, data=None,
|
||||
params=None, auth=None, cookies=None, hooks=None, json=None):
|
||||
"""Prepares the entire request with the given parameters."""
|
||||
|
||||
self.prepare_method(method)
|
||||
self.prepare_url(url, params)
|
||||
self.prepare_headers(headers)
|
||||
self.prepare_cookies(cookies)
|
||||
self.prepare_body(data, files, json)
|
||||
self.prepare_auth(auth, url)
|
||||
|
||||
# Note that prepare_auth must be last to enable authentication schemes
|
||||
# such as OAuth to work on a fully prepared request.
|
||||
|
||||
# This MUST go after prepare_auth. Authenticators could add a hook
|
||||
self.prepare_hooks(hooks)
|
||||
|
||||
def __repr__(self):
|
||||
return '<PreparedRequest [%s]>' % (self.method)
|
||||
|
||||
def copy(self):
|
||||
p = PreparedRequest()
|
||||
p.method = self.method
|
||||
p.url = self.url
|
||||
p.headers = self.headers.copy() if self.headers is not None else None
|
||||
p._cookies = _copy_cookie_jar(self._cookies)
|
||||
p.body = self.body
|
||||
p.hooks = self.hooks
|
||||
p._body_position = self._body_position
|
||||
return p
|
||||
|
||||
def prepare_method(self, method):
|
||||
"""Prepares the given HTTP method."""
|
||||
self.method = method
|
||||
if self.method is not None:
|
||||
self.method = to_native_string(self.method.upper())
|
||||
|
||||
@staticmethod
|
||||
def _get_idna_encoded_host(host):
|
||||
import idna
|
||||
|
||||
try:
|
||||
host = idna.encode(host, uts46=True).decode('utf-8')
|
||||
except idna.IDNAError:
|
||||
raise UnicodeError
|
||||
return host
|
||||
|
||||
def prepare_url(self, url, params):
|
||||
"""Prepares the given HTTP URL."""
|
||||
#: Accept objects that have string representations.
|
||||
#: We're unable to blindly call unicode/str functions
|
||||
#: as this will include the bytestring indicator (b'')
|
||||
#: on python 3.x.
|
||||
#: https://github.com/psf/requests/pull/2238
|
||||
if isinstance(url, bytes):
|
||||
url = url.decode('utf8')
|
||||
else:
|
||||
url = unicode(url) if is_py2 else str(url)
|
||||
|
||||
# Remove leading whitespaces from url
|
||||
url = url.lstrip()
|
||||
|
||||
# Don't do any URL preparation for non-HTTP schemes like `mailto`,
|
||||
# `data` etc to work around exceptions from `url_parse`, which
|
||||
# handles RFC 3986 only.
|
||||
if ':' in url and not url.lower().startswith('http'):
|
||||
self.url = url
|
||||
return
|
||||
|
||||
# Support for unicode domain names and paths.
|
||||
try:
|
||||
scheme, auth, host, port, path, query, fragment = parse_url(url)
|
||||
except LocationParseError as e:
|
||||
raise InvalidURL(*e.args)
|
||||
|
||||
if not scheme:
|
||||
error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?")
|
||||
error = error.format(to_native_string(url, 'utf8'))
|
||||
|
||||
raise MissingSchema(error)
|
||||
|
||||
if not host:
|
||||
raise InvalidURL("Invalid URL %r: No host supplied" % url)
|
||||
|
||||
# In general, we want to try IDNA encoding the hostname if the string contains
|
||||
# non-ASCII characters. This allows users to automatically get the correct IDNA
|
||||
# behaviour. For strings containing only ASCII characters, we need to also verify
|
||||
# it doesn't start with a wildcard (*), before allowing the unencoded hostname.
|
||||
if not unicode_is_ascii(host):
|
||||
try:
|
||||
host = self._get_idna_encoded_host(host)
|
||||
except UnicodeError:
|
||||
raise InvalidURL('URL has an invalid label.')
|
||||
elif host.startswith(u'*'):
|
||||
raise InvalidURL('URL has an invalid label.')
|
||||
|
||||
# Carefully reconstruct the network location
|
||||
netloc = auth or ''
|
||||
if netloc:
|
||||
netloc += '@'
|
||||
netloc += host
|
||||
if port:
|
||||
netloc += ':' + str(port)
|
||||
|
||||
# Bare domains aren't valid URLs.
|
||||
if not path:
|
||||
path = '/'
|
||||
|
||||
if is_py2:
|
||||
if isinstance(scheme, str):
|
||||
scheme = scheme.encode('utf-8')
|
||||
if isinstance(netloc, str):
|
||||
netloc = netloc.encode('utf-8')
|
||||
if isinstance(path, str):
|
||||
path = path.encode('utf-8')
|
||||
if isinstance(query, str):
|
||||
query = query.encode('utf-8')
|
||||
if isinstance(fragment, str):
|
||||
fragment = fragment.encode('utf-8')
|
||||
|
||||
if isinstance(params, (str, bytes)):
|
||||
params = to_native_string(params)
|
||||
|
||||
enc_params = self._encode_params(params)
|
||||
if enc_params:
|
||||
if query:
|
||||
query = '%s&%s' % (query, enc_params)
|
||||
else:
|
||||
query = enc_params
|
||||
|
||||
url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
|
||||
self.url = url
|
||||
|
||||
def prepare_headers(self, headers):
|
||||
"""Prepares the given HTTP headers."""
|
||||
|
||||
self.headers = CaseInsensitiveDict()
|
||||
if headers:
|
||||
for header in headers.items():
|
||||
# Raise exception on invalid header value.
|
||||
check_header_validity(header)
|
||||
name, value = header
|
||||
self.headers[to_native_string(name)] = value
|
||||
|
||||
def prepare_body(self, data, files, json=None):
|
||||
"""Prepares the given HTTP body data."""
|
||||
|
||||
# Check if file, fo, generator, iterator.
|
||||
# If not, run through normal process.
|
||||
|
||||
# Nottin' on you.
|
||||
body = None
|
||||
content_type = None
|
||||
|
||||
if not data and json is not None:
|
||||
# urllib3 requires a bytes-like body. Python 2's json.dumps
|
||||
# provides this natively, but Python 3 gives a Unicode string.
|
||||
content_type = 'application/json'
|
||||
body = complexjson.dumps(json)
|
||||
if not isinstance(body, bytes):
|
||||
body = body.encode('utf-8')
|
||||
|
||||
is_stream = all([
|
||||
hasattr(data, '__iter__'),
|
||||
not isinstance(data, (basestring, list, tuple, Mapping))
|
||||
])
|
||||
|
||||
try:
|
||||
length = super_len(data)
|
||||
except (TypeError, AttributeError, UnsupportedOperation):
|
||||
length = None
|
||||
|
||||
if is_stream:
|
||||
body = data
|
||||
|
||||
if getattr(body, 'tell', None) is not None:
|
||||
# Record the current file position before reading.
|
||||
# This will allow us to rewind a file in the event
|
||||
# of a redirect.
|
||||
try:
|
||||
self._body_position = body.tell()
|
||||
except (IOError, OSError):
|
||||
# This differentiates from None, allowing us to catch
|
||||
# a failed `tell()` later when trying to rewind the body
|
||||
self._body_position = object()
|
||||
|
||||
if files:
|
||||
raise NotImplementedError('Streamed bodies and files are mutually exclusive.')
|
||||
|
||||
if length:
|
||||
self.headers['Content-Length'] = builtin_str(length)
|
||||
else:
|
||||
self.headers['Transfer-Encoding'] = 'chunked'
|
||||
else:
|
||||
# Multi-part file uploads.
|
||||
if files:
|
||||
(body, content_type) = self._encode_files(files, data)
|
||||
else:
|
||||
if data:
|
||||
body = self._encode_params(data)
|
||||
if isinstance(data, basestring) or hasattr(data, 'read'):
|
||||
content_type = None
|
||||
else:
|
||||
content_type = 'application/x-www-form-urlencoded'
|
||||
|
||||
self.prepare_content_length(body)
|
||||
|
||||
# Add content-type if it wasn't explicitly provided.
|
||||
if content_type and ('content-type' not in self.headers):
|
||||
self.headers['Content-Type'] = content_type
|
||||
|
||||
self.body = body
|
||||
|
||||
def prepare_content_length(self, body):
|
||||
"""Prepare Content-Length header based on request method and body"""
|
||||
if body is not None:
|
||||
length = super_len(body)
|
||||
if length:
|
||||
# If length exists, set it. Otherwise, we fallback
|
||||
# to Transfer-Encoding: chunked.
|
||||
self.headers['Content-Length'] = builtin_str(length)
|
||||
elif self.method not in ('GET', 'HEAD') and self.headers.get('Content-Length') is None:
|
||||
# Set Content-Length to 0 for methods that can have a body
|
||||
# but don't provide one. (i.e. not GET or HEAD)
|
||||
self.headers['Content-Length'] = '0'
|
||||
|
||||
def prepare_auth(self, auth, url=''):
|
||||
"""Prepares the given HTTP auth data."""
|
||||
|
||||
# If no Auth is explicitly provided, extract it from the URL first.
|
||||
if auth is None:
|
||||
url_auth = get_auth_from_url(self.url)
|
||||
auth = url_auth if any(url_auth) else None
|
||||
|
||||
if auth:
|
||||
if isinstance(auth, tuple) and len(auth) == 2:
|
||||
# special-case basic HTTP auth
|
||||
auth = HTTPBasicAuth(*auth)
|
||||
|
||||
# Allow auth to make its changes.
|
||||
r = auth(self)
|
||||
|
||||
# Update self to reflect the auth changes.
|
||||
self.__dict__.update(r.__dict__)
|
||||
|
||||
# Recompute Content-Length
|
||||
self.prepare_content_length(self.body)
|
||||
|
||||
def prepare_cookies(self, cookies):
|
||||
"""Prepares the given HTTP cookie data.
|
||||
|
||||
This function eventually generates a ``Cookie`` header from the
|
||||
given cookies using cookielib. Due to cookielib's design, the header
|
||||
will not be regenerated if it already exists, meaning this function
|
||||
can only be called once for the life of the
|
||||
:class:`PreparedRequest <PreparedRequest>` object. Any subsequent calls
|
||||
to ``prepare_cookies`` will have no actual effect, unless the "Cookie"
|
||||
header is removed beforehand.
|
||||
"""
|
||||
if isinstance(cookies, cookielib.CookieJar):
|
||||
self._cookies = cookies
|
||||
else:
|
||||
self._cookies = cookiejar_from_dict(cookies)
|
||||
|
||||
cookie_header = get_cookie_header(self._cookies, self)
|
||||
if cookie_header is not None:
|
||||
self.headers['Cookie'] = cookie_header
|
||||
|
||||
def prepare_hooks(self, hooks):
|
||||
"""Prepares the given hooks."""
|
||||
# hooks can be passed as None to the prepare method and to this
|
||||
# method. To prevent iterating over None, simply use an empty list
|
||||
# if hooks is False-y
|
||||
hooks = hooks or []
|
||||
for event in hooks:
|
||||
self.register_hook(event, hooks[event])
|
||||
|
||||
|
||||
class Response(object):
|
||||
"""The :class:`Response <Response>` object, which contains a
|
||||
server's response to an HTTP request.
|
||||
"""
|
||||
|
||||
__attrs__ = [
|
||||
'_content', 'status_code', 'headers', 'url', 'history',
|
||||
'encoding', 'reason', 'cookies', 'elapsed', 'request'
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self._content = False
|
||||
self._content_consumed = False
|
||||
self._next = None
|
||||
|
||||
#: Integer Code of responded HTTP Status, e.g. 404 or 200.
|
||||
self.status_code = None
|
||||
|
||||
#: Case-insensitive Dictionary of Response Headers.
|
||||
#: For example, ``headers['content-encoding']`` will return the
|
||||
#: value of a ``'Content-Encoding'`` response header.
|
||||
self.headers = CaseInsensitiveDict()
|
||||
|
||||
#: File-like object representation of response (for advanced usage).
|
||||
#: Use of ``raw`` requires that ``stream=True`` be set on the request.
|
||||
#: This requirement does not apply for use internally to Requests.
|
||||
self.raw = None
|
||||
|
||||
#: Final URL location of Response.
|
||||
self.url = None
|
||||
|
||||
#: Encoding to decode with when accessing r.text.
|
||||
self.encoding = None
|
||||
|
||||
#: A list of :class:`Response <Response>` objects from
|
||||
#: the history of the Request. Any redirect responses will end
|
||||
#: up here. The list is sorted from the oldest to the most recent request.
|
||||
self.history = []
|
||||
|
||||
#: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK".
|
||||
self.reason = None
|
||||
|
||||
#: A CookieJar of Cookies the server sent back.
|
||||
self.cookies = cookiejar_from_dict({})
|
||||
|
||||
#: The amount of time elapsed between sending the request
|
||||
#: and the arrival of the response (as a timedelta).
|
||||
#: This property specifically measures the time taken between sending
|
||||
#: the first byte of the request and finishing parsing the headers. It
|
||||
#: is therefore unaffected by consuming the response content or the
|
||||
#: value of the ``stream`` keyword argument.
|
||||
self.elapsed = datetime.timedelta(0)
|
||||
|
||||
#: The :class:`PreparedRequest <PreparedRequest>` object to which this
|
||||
#: is a response.
|
||||
self.request = None
|
||||
|
||||
#: If there was an error in the processing of content,
|
||||
#: then save the error that would return the same error when you re-appeal.
|
||||
self._error = None
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def __getstate__(self):
|
||||
# Consume everything; accessing the content attribute makes
|
||||
# sure the content has been fully read.
|
||||
if not self._content_consumed:
|
||||
self.content
|
||||
|
||||
return {attr: getattr(self, attr, None) for attr in self.__attrs__}
|
||||
|
||||
def __setstate__(self, state):
|
||||
for name, value in state.items():
|
||||
setattr(self, name, value)
|
||||
|
||||
# pickled objects do not have .raw
|
||||
setattr(self, '_content_consumed', True)
|
||||
setattr(self, 'raw', None)
|
||||
|
||||
def __repr__(self):
|
||||
return '<Response [%s]>' % (self.status_code)
|
||||
|
||||
def __bool__(self):
|
||||
"""Returns True if :attr:`status_code` is less than 400.
|
||||
|
||||
This attribute checks if the status code of the response is between
|
||||
400 and 600 to see if there was a client error or a server error. If
|
||||
the status code, is between 200 and 400, this will return True. This
|
||||
is **not** a check to see if the response code is ``200 OK``.
|
||||
"""
|
||||
return self.ok
|
||||
|
||||
def __nonzero__(self):
|
||||
"""Returns True if :attr:`status_code` is less than 400.
|
||||
|
||||
This attribute checks if the status code of the response is between
|
||||
400 and 600 to see if there was a client error or a server error. If
|
||||
the status code, is between 200 and 400, this will return True. This
|
||||
is **not** a check to see if the response code is ``200 OK``.
|
||||
"""
|
||||
return self.ok
|
||||
|
||||
def __iter__(self):
|
||||
"""Allows you to use a response as an iterator."""
|
||||
return self.iter_content(128)
|
||||
|
||||
@property
|
||||
def ok(self):
|
||||
"""Returns True if :attr:`status_code` is less than 400, False if not.
|
||||
|
||||
This attribute checks if the status code of the response is between
|
||||
400 and 600 to see if there was a client error or a server error. If
|
||||
the status code is between 200 and 400, this will return True. This
|
||||
is **not** a check to see if the response code is ``200 OK``.
|
||||
"""
|
||||
try:
|
||||
self.raise_for_status()
|
||||
except HTTPError:
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_redirect(self):
|
||||
"""True if this Response is a well-formed HTTP redirect that could have
|
||||
been processed automatically (by :meth:`Session.resolve_redirects`).
|
||||
"""
|
||||
return ('location' in self.headers and self.status_code in REDIRECT_STATI)
|
||||
|
||||
@property
|
||||
def is_permanent_redirect(self):
|
||||
"""True if this Response one of the permanent versions of redirect."""
|
||||
return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect))
|
||||
|
||||
@property
|
||||
def next(self):
|
||||
"""Returns a PreparedRequest for the next request in a redirect chain, if there is one."""
|
||||
return self._next
|
||||
|
||||
@property
|
||||
def apparent_encoding(self):
|
||||
"""The apparent encoding, provided by the chardet library."""
|
||||
return chardet.detect(self.content)['encoding']
|
||||
|
||||
def iter_content(self, chunk_size=1, decode_unicode=False):
|
||||
"""Iterates over the response data. When stream=True is set on the
|
||||
request, this avoids reading the content at once into memory for
|
||||
large responses. The chunk size is the number of bytes it should
|
||||
read into memory. This is not necessarily the length of each item
|
||||
returned as decoding can take place.
|
||||
|
||||
chunk_size must be of type int or None. A value of None will
|
||||
function differently depending on the value of `stream`.
|
||||
stream=True will read data as it arrives in whatever size the
|
||||
chunks are received. If stream=False, data is returned as
|
||||
a single chunk.
|
||||
|
||||
If decode_unicode is True, content will be decoded using the best
|
||||
available encoding based on the response.
|
||||
"""
|
||||
|
||||
def generate():
|
||||
# Special case for urllib3.
|
||||
if hasattr(self.raw, 'stream'):
|
||||
try:
|
||||
for chunk in self.raw.stream(chunk_size, decode_content=True):
|
||||
yield chunk
|
||||
|
||||
except ProtocolError as e:
|
||||
self._error = ChunkedEncodingError(e)
|
||||
|
||||
except DecodeError as e:
|
||||
self._error = ContentDecodingError(e)
|
||||
|
||||
except ReadTimeoutError as e:
|
||||
self._error = ConnectionError(e)
|
||||
|
||||
finally:
|
||||
# if we had an error - throw the saved error
|
||||
if self._error:
|
||||
raise self._error
|
||||
|
||||
else:
|
||||
# Standard file-like object.
|
||||
while True:
|
||||
chunk = self.raw.read(chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
self._content_consumed = True
|
||||
|
||||
if self._content_consumed and isinstance(self._content, bool):
|
||||
raise StreamConsumedError()
|
||||
elif chunk_size is not None and not isinstance(chunk_size, int):
|
||||
raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size))
|
||||
# simulate reading small chunks of the content
|
||||
reused_chunks = iter_slices(self._content, chunk_size)
|
||||
|
||||
stream_chunks = generate()
|
||||
|
||||
chunks = reused_chunks if self._content_consumed else stream_chunks
|
||||
|
||||
if decode_unicode:
|
||||
chunks = stream_decode_response_unicode(chunks, self)
|
||||
|
||||
return chunks
|
||||
|
||||
def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None):
|
||||
"""Iterates over the response data, one line at a time. When
|
||||
stream=True is set on the request, this avoids reading the
|
||||
content at once into memory for large responses.
|
||||
|
||||
.. note:: This method is not reentrant safe.
|
||||
"""
|
||||
|
||||
pending = None
|
||||
|
||||
for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):
|
||||
|
||||
if pending is not None:
|
||||
chunk = pending + chunk
|
||||
|
||||
if delimiter:
|
||||
lines = chunk.split(delimiter)
|
||||
else:
|
||||
lines = chunk.splitlines()
|
||||
|
||||
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
|
||||
pending = lines.pop()
|
||||
else:
|
||||
pending = None
|
||||
|
||||
for line in lines:
|
||||
yield line
|
||||
|
||||
if pending is not None:
|
||||
yield pending
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
"""Content of the response, in bytes."""
|
||||
|
||||
if self._content is False:
|
||||
# Read the contents.
|
||||
if self._content_consumed:
|
||||
raise RuntimeError(
|
||||
'The content for this response was already consumed')
|
||||
|
||||
if self.status_code == 0 or self.raw is None:
|
||||
self._content = None
|
||||
else:
|
||||
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
|
||||
|
||||
# if we had an error - throw the saved error
|
||||
if self._error is not None:
|
||||
raise self._error
|
||||
|
||||
self._content_consumed = True
|
||||
# don't need to release the connection; that's been handled by urllib3
|
||||
# since we exhausted the data.
|
||||
return self._content
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
"""Content of the response, in unicode.
|
||||
|
||||
If Response.encoding is None, encoding will be guessed using
|
||||
``chardet``.
|
||||
|
||||
The encoding of the response content is determined based solely on HTTP
|
||||
headers, following RFC 2616 to the letter. If you can take advantage of
|
||||
non-HTTP knowledge to make a better guess at the encoding, you should
|
||||
set ``r.encoding`` appropriately before accessing this property.
|
||||
"""
|
||||
|
||||
# Try charset from content-type
|
||||
content = None
|
||||
encoding = self.encoding
|
||||
|
||||
if not self.content:
|
||||
return str('')
|
||||
|
||||
# Fallback to auto-detected encoding.
|
||||
if self.encoding is None:
|
||||
encoding = self.apparent_encoding
|
||||
# Forcefully remove BOM from UTF-8
|
||||
elif self.encoding.lower() == 'utf-8':
|
||||
encoding = 'utf-8-sig'
|
||||
|
||||
# Decode unicode from given encoding.
|
||||
try:
|
||||
content = str(self.content, encoding, errors='replace')
|
||||
except (LookupError, TypeError):
|
||||
# A LookupError is raised if the encoding was not found which could
|
||||
# indicate a misspelling or similar mistake.
|
||||
#
|
||||
# A TypeError can be raised if encoding is None
|
||||
#
|
||||
# So we try blindly encoding.
|
||||
content = str(self.content, errors='replace')
|
||||
|
||||
return content
|
||||
|
||||
def json(self, **kwargs):
|
||||
r"""Returns the json-encoded content of a response, if any.
|
||||
|
||||
:param \*\*kwargs: Optional arguments that ``json.loads`` takes.
|
||||
:raises ValueError: If the response body does not contain valid json.
|
||||
"""
|
||||
|
||||
if not self.encoding and self.content and len(self.content) > 3:
|
||||
# No encoding set. JSON RFC 4627 section 3 states we should expect
|
||||
# UTF-8, -16 or -32. Detect which one to use; If the detection or
|
||||
# decoding fails, fall back to `self.text` (using chardet to make
|
||||
# a best guess).
|
||||
encoding = guess_json_utf(self.content)
|
||||
if encoding is not None:
|
||||
try:
|
||||
return complexjson.loads(
|
||||
self.content.decode(encoding), **kwargs
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
# Wrong UTF codec detected; usually because it's not UTF-8
|
||||
# but some other 8-bit codec. This is an RFC violation,
|
||||
# and the server didn't bother to tell us what codec *was*
|
||||
# used.
|
||||
pass
|
||||
return complexjson.loads(self.text, **kwargs)
|
||||
|
||||
@property
|
||||
def links(self):
|
||||
"""Returns the parsed header links of the response, if any."""
|
||||
|
||||
header = self.headers.get('link')
|
||||
|
||||
# l = MultiDict()
|
||||
l = {}
|
||||
|
||||
if header:
|
||||
links = parse_header_links(header)
|
||||
|
||||
for link in links:
|
||||
key = link.get('rel') or link.get('url')
|
||||
l[key] = link
|
||||
|
||||
return l
|
||||
|
||||
def raise_for_status(self):
|
||||
"""Raises stored :class:`HTTPError`, if one occurred."""
|
||||
|
||||
http_error_msg = ''
|
||||
if isinstance(self.reason, bytes):
|
||||
# We attempt to decode utf-8 first because some servers
|
||||
# choose to localize their reason strings. If the string
|
||||
# isn't utf-8, we fall back to iso-8859-1 for all other
|
||||
# encodings. (See PR #3538)
|
||||
try:
|
||||
reason = self.reason.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
reason = self.reason.decode('iso-8859-1')
|
||||
else:
|
||||
reason = self.reason
|
||||
|
||||
if 400 <= self.status_code < 500:
|
||||
http_error_msg = u'%s Client Error: %s for url: %s' % (self.status_code, reason, self.url)
|
||||
|
||||
elif 500 <= self.status_code < 600:
|
||||
http_error_msg = u'%s Server Error: %s for url: %s' % (self.status_code, reason, self.url)
|
||||
|
||||
if http_error_msg:
|
||||
raise HTTPError(http_error_msg, response=self)
|
||||
|
||||
def close(self):
|
||||
"""Releases the connection back to the pool. Once this method has been
|
||||
called the underlying ``raw`` object must not be accessed again.
|
||||
|
||||
*Note: Should not normally need to be called explicitly.*
|
||||
"""
|
||||
if not self._content_consumed:
|
||||
self.raw.close()
|
||||
|
||||
release_conn = getattr(self.raw, 'release_conn', None)
|
||||
if release_conn is not None:
|
||||
release_conn()
|
|
@ -0,0 +1,14 @@
|
|||
import sys
|
||||
|
||||
# This code exists for backwards compatibility reasons.
|
||||
# I don't like it either. Just look the other way. :)
|
||||
|
||||
for package in ('urllib3', 'idna', 'chardet'):
|
||||
locals()[package] = __import__(package)
|
||||
# This traversal is apparently necessary such that the identities are
|
||||
# preserved (requests.packages.urllib3.* is urllib3.*)
|
||||
for mod in list(sys.modules):
|
||||
if mod == package or mod.startswith(package + '.'):
|
||||
sys.modules['requests.packages.' + mod] = sys.modules[mod]
|
||||
|
||||
# Kinda cool, though, right?
|
|
@ -0,0 +1,767 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.session
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
This module provides a Session object to manage and persist settings across
|
||||
requests (cookies, auth, proxies).
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import timedelta
|
||||
from collections import OrderedDict
|
||||
|
||||
from .auth import _basic_auth_str
|
||||
from .compat import cookielib, is_py3, urljoin, urlparse, Mapping
|
||||
from .cookies import (
|
||||
cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies)
|
||||
from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT
|
||||
from .hooks import default_hooks, dispatch_hook
|
||||
from ._internal_utils import to_native_string
|
||||
from .utils import to_key_val_list, default_headers, DEFAULT_PORTS
|
||||
from .exceptions import (
|
||||
TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError)
|
||||
|
||||
from .structures import CaseInsensitiveDict
|
||||
from .adapters import HTTPAdapter
|
||||
|
||||
from .utils import (
|
||||
requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies,
|
||||
get_auth_from_url, rewind_body
|
||||
)
|
||||
|
||||
from .status_codes import codes
|
||||
|
||||
# formerly defined here, reexposed here for backward compatibility
|
||||
from .models import REDIRECT_STATI
|
||||
|
||||
# Preferred clock, based on which one is more accurate on a given system.
|
||||
if sys.platform == 'win32':
|
||||
try: # Python 3.4+
|
||||
preferred_clock = time.perf_counter
|
||||
except AttributeError: # Earlier than Python 3.
|
||||
preferred_clock = time.clock
|
||||
else:
|
||||
preferred_clock = time.time
|
||||
|
||||
|
||||
def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
|
||||
"""Determines appropriate setting for a given request, taking into account
|
||||
the explicit setting on that request, and the setting in the session. If a
|
||||
setting is a dictionary, they will be merged together using `dict_class`
|
||||
"""
|
||||
|
||||
if session_setting is None:
|
||||
return request_setting
|
||||
|
||||
if request_setting is None:
|
||||
return session_setting
|
||||
|
||||
# Bypass if not a dictionary (e.g. verify)
|
||||
if not (
|
||||
isinstance(session_setting, Mapping) and
|
||||
isinstance(request_setting, Mapping)
|
||||
):
|
||||
return request_setting
|
||||
|
||||
merged_setting = dict_class(to_key_val_list(session_setting))
|
||||
merged_setting.update(to_key_val_list(request_setting))
|
||||
|
||||
# Remove keys that are set to None. Extract keys first to avoid altering
|
||||
# the dictionary during iteration.
|
||||
none_keys = [k for (k, v) in merged_setting.items() if v is None]
|
||||
for key in none_keys:
|
||||
del merged_setting[key]
|
||||
|
||||
return merged_setting
|
||||
|
||||
|
||||
def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict):
|
||||
"""Properly merges both requests and session hooks.
|
||||
|
||||
This is necessary because when request_hooks == {'response': []}, the
|
||||
merge breaks Session hooks entirely.
|
||||
"""
|
||||
if session_hooks is None or session_hooks.get('response') == []:
|
||||
return request_hooks
|
||||
|
||||
if request_hooks is None or request_hooks.get('response') == []:
|
||||
return session_hooks
|
||||
|
||||
return merge_setting(request_hooks, session_hooks, dict_class)
|
||||
|
||||
|
||||
class SessionRedirectMixin(object):
|
||||
|
||||
def get_redirect_target(self, resp):
|
||||
"""Receives a Response. Returns a redirect URI or ``None``"""
|
||||
# Due to the nature of how requests processes redirects this method will
|
||||
# be called at least once upon the original response and at least twice
|
||||
# on each subsequent redirect response (if any).
|
||||
# If a custom mixin is used to handle this logic, it may be advantageous
|
||||
# to cache the redirect location onto the response object as a private
|
||||
# attribute.
|
||||
if resp.is_redirect:
|
||||
location = resp.headers['location']
|
||||
# Currently the underlying http module on py3 decode headers
|
||||
# in latin1, but empirical evidence suggests that latin1 is very
|
||||
# rarely used with non-ASCII characters in HTTP headers.
|
||||
# It is more likely to get UTF8 header rather than latin1.
|
||||
# This causes incorrect handling of UTF8 encoded location headers.
|
||||
# To solve this, we re-encode the location in latin1.
|
||||
if is_py3:
|
||||
location = location.encode('latin1')
|
||||
return to_native_string(location, 'utf8')
|
||||
return None
|
||||
|
||||
def should_strip_auth(self, old_url, new_url):
|
||||
"""Decide whether Authorization header should be removed when redirecting"""
|
||||
old_parsed = urlparse(old_url)
|
||||
new_parsed = urlparse(new_url)
|
||||
if old_parsed.hostname != new_parsed.hostname:
|
||||
return True
|
||||
# Special case: allow http -> https redirect when using the standard
|
||||
# ports. This isn't specified by RFC 7235, but is kept to avoid
|
||||
# breaking backwards compatibility with older versions of requests
|
||||
# that allowed any redirects on the same host.
|
||||
if (old_parsed.scheme == 'http' and old_parsed.port in (80, None)
|
||||
and new_parsed.scheme == 'https' and new_parsed.port in (443, None)):
|
||||
return False
|
||||
|
||||
# Handle default port usage corresponding to scheme.
|
||||
changed_port = old_parsed.port != new_parsed.port
|
||||
changed_scheme = old_parsed.scheme != new_parsed.scheme
|
||||
default_port = (DEFAULT_PORTS.get(old_parsed.scheme, None), None)
|
||||
if (not changed_scheme and old_parsed.port in default_port
|
||||
and new_parsed.port in default_port):
|
||||
return False
|
||||
|
||||
# Standard case: root URI must match
|
||||
return changed_port or changed_scheme
|
||||
|
||||
def resolve_redirects(self, resp, req, stream=False, timeout=None,
|
||||
verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs):
|
||||
"""Receives a Response. Returns a generator of Responses or Requests."""
|
||||
|
||||
hist = [] # keep track of history
|
||||
|
||||
url = self.get_redirect_target(resp)
|
||||
previous_fragment = urlparse(req.url).fragment
|
||||
while url:
|
||||
prepared_request = req.copy()
|
||||
|
||||
# Update history and keep track of redirects.
|
||||
# resp.history must ignore the original request in this loop
|
||||
hist.append(resp)
|
||||
resp.history = hist[1:]
|
||||
|
||||
try:
|
||||
resp.content # Consume socket so it can be released
|
||||
except (ChunkedEncodingError, ContentDecodingError, RuntimeError):
|
||||
resp.raw.read(decode_content=False)
|
||||
|
||||
if len(resp.history) >= self.max_redirects:
|
||||
raise TooManyRedirects('Exceeded {} redirects.'.format(self.max_redirects), response=resp)
|
||||
|
||||
# Release the connection back into the pool.
|
||||
resp.close()
|
||||
|
||||
# Handle redirection without scheme (see: RFC 1808 Section 4)
|
||||
if url.startswith('//'):
|
||||
parsed_rurl = urlparse(resp.url)
|
||||
url = ':'.join([to_native_string(parsed_rurl.scheme), url])
|
||||
|
||||
# Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2)
|
||||
parsed = urlparse(url)
|
||||
if parsed.fragment == '' and previous_fragment:
|
||||
parsed = parsed._replace(fragment=previous_fragment)
|
||||
elif parsed.fragment:
|
||||
previous_fragment = parsed.fragment
|
||||
url = parsed.geturl()
|
||||
|
||||
# Facilitate relative 'location' headers, as allowed by RFC 7231.
|
||||
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
|
||||
# Compliant with RFC3986, we percent encode the url.
|
||||
if not parsed.netloc:
|
||||
url = urljoin(resp.url, requote_uri(url))
|
||||
else:
|
||||
url = requote_uri(url)
|
||||
|
||||
prepared_request.url = to_native_string(url)
|
||||
|
||||
self.rebuild_method(prepared_request, resp)
|
||||
|
||||
# https://github.com/psf/requests/issues/1084
|
||||
if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect):
|
||||
# https://github.com/psf/requests/issues/3490
|
||||
purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding')
|
||||
for header in purged_headers:
|
||||
prepared_request.headers.pop(header, None)
|
||||
prepared_request.body = None
|
||||
|
||||
headers = prepared_request.headers
|
||||
headers.pop('Cookie', None)
|
||||
|
||||
# Extract any cookies sent on the response to the cookiejar
|
||||
# in the new request. Because we've mutated our copied prepared
|
||||
# request, use the old one that we haven't yet touched.
|
||||
extract_cookies_to_jar(prepared_request._cookies, req, resp.raw)
|
||||
merge_cookies(prepared_request._cookies, self.cookies)
|
||||
prepared_request.prepare_cookies(prepared_request._cookies)
|
||||
|
||||
# Rebuild auth and proxy information.
|
||||
proxies = self.rebuild_proxies(prepared_request, proxies)
|
||||
self.rebuild_auth(prepared_request, resp)
|
||||
|
||||
# A failed tell() sets `_body_position` to `object()`. This non-None
|
||||
# value ensures `rewindable` will be True, allowing us to raise an
|
||||
# UnrewindableBodyError, instead of hanging the connection.
|
||||
rewindable = (
|
||||
prepared_request._body_position is not None and
|
||||
('Content-Length' in headers or 'Transfer-Encoding' in headers)
|
||||
)
|
||||
|
||||
# Attempt to rewind consumed file-like object.
|
||||
if rewindable:
|
||||
rewind_body(prepared_request)
|
||||
|
||||
# Override the original request.
|
||||
req = prepared_request
|
||||
|
||||
if yield_requests:
|
||||
yield req
|
||||
else:
|
||||
|
||||
resp = self.send(
|
||||
req,
|
||||
stream=stream,
|
||||
timeout=timeout,
|
||||
verify=verify,
|
||||
cert=cert,
|
||||
proxies=proxies,
|
||||
allow_redirects=False,
|
||||
**adapter_kwargs
|
||||
)
|
||||
|
||||
extract_cookies_to_jar(self.cookies, prepared_request, resp.raw)
|
||||
|
||||
# extract redirect url, if any, for the next loop
|
||||
url = self.get_redirect_target(resp)
|
||||
yield resp
|
||||
|
||||
def rebuild_auth(self, prepared_request, response):
|
||||
"""When being redirected we may want to strip authentication from the
|
||||
request to avoid leaking credentials. This method intelligently removes
|
||||
and reapplies authentication where possible to avoid credential loss.
|
||||
"""
|
||||
headers = prepared_request.headers
|
||||
url = prepared_request.url
|
||||
|
||||
if 'Authorization' in headers and self.should_strip_auth(response.request.url, url):
|
||||
# If we get redirected to a new host, we should strip out any
|
||||
# authentication headers.
|
||||
del headers['Authorization']
|
||||
|
||||
# .netrc might have more auth for us on our new host.
|
||||
new_auth = get_netrc_auth(url) if self.trust_env else None
|
||||
if new_auth is not None:
|
||||
prepared_request.prepare_auth(new_auth)
|
||||
|
||||
|
||||
def rebuild_proxies(self, prepared_request, proxies):
|
||||
"""This method re-evaluates the proxy configuration by considering the
|
||||
environment variables. If we are redirected to a URL covered by
|
||||
NO_PROXY, we strip the proxy configuration. Otherwise, we set missing
|
||||
proxy keys for this URL (in case they were stripped by a previous
|
||||
redirect).
|
||||
|
||||
This method also replaces the Proxy-Authorization header where
|
||||
necessary.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
proxies = proxies if proxies is not None else {}
|
||||
headers = prepared_request.headers
|
||||
url = prepared_request.url
|
||||
scheme = urlparse(url).scheme
|
||||
new_proxies = proxies.copy()
|
||||
no_proxy = proxies.get('no_proxy')
|
||||
|
||||
bypass_proxy = should_bypass_proxies(url, no_proxy=no_proxy)
|
||||
if self.trust_env and not bypass_proxy:
|
||||
environ_proxies = get_environ_proxies(url, no_proxy=no_proxy)
|
||||
|
||||
proxy = environ_proxies.get(scheme, environ_proxies.get('all'))
|
||||
|
||||
if proxy:
|
||||
new_proxies.setdefault(scheme, proxy)
|
||||
|
||||
if 'Proxy-Authorization' in headers:
|
||||
del headers['Proxy-Authorization']
|
||||
|
||||
try:
|
||||
username, password = get_auth_from_url(new_proxies[scheme])
|
||||
except KeyError:
|
||||
username, password = None, None
|
||||
|
||||
if username and password:
|
||||
headers['Proxy-Authorization'] = _basic_auth_str(username, password)
|
||||
|
||||
return new_proxies
|
||||
|
||||
def rebuild_method(self, prepared_request, response):
|
||||
"""When being redirected we may want to change the method of the request
|
||||
based on certain specs or browser behavior.
|
||||
"""
|
||||
method = prepared_request.method
|
||||
|
||||
# https://tools.ietf.org/html/rfc7231#section-6.4.4
|
||||
if response.status_code == codes.see_other and method != 'HEAD':
|
||||
method = 'GET'
|
||||
|
||||
# Do what the browsers do, despite standards...
|
||||
# First, turn 302s into GETs.
|
||||
if response.status_code == codes.found and method != 'HEAD':
|
||||
method = 'GET'
|
||||
|
||||
# Second, if a POST is responded to with a 301, turn it into a GET.
|
||||
# This bizarre behaviour is explained in Issue 1704.
|
||||
if response.status_code == codes.moved and method == 'POST':
|
||||
method = 'GET'
|
||||
|
||||
prepared_request.method = method
|
||||
|
||||
|
||||
class Session(SessionRedirectMixin):
|
||||
"""A Requests session.
|
||||
|
||||
Provides cookie persistence, connection-pooling, and configuration.
|
||||
|
||||
Basic Usage::
|
||||
|
||||
>>> import requests
|
||||
>>> s = requests.Session()
|
||||
>>> s.get('https://httpbin.org/get')
|
||||
<Response [200]>
|
||||
|
||||
Or as a context manager::
|
||||
|
||||
>>> with requests.Session() as s:
|
||||
... s.get('https://httpbin.org/get')
|
||||
<Response [200]>
|
||||
"""
|
||||
|
||||
__attrs__ = [
|
||||
'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify',
|
||||
'cert', 'prefetch', 'adapters', 'stream', 'trust_env',
|
||||
'max_redirects',
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
|
||||
#: A case-insensitive dictionary of headers to be sent on each
|
||||
#: :class:`Request <Request>` sent from this
|
||||
#: :class:`Session <Session>`.
|
||||
self.headers = default_headers()
|
||||
|
||||
#: Default Authentication tuple or object to attach to
|
||||
#: :class:`Request <Request>`.
|
||||
self.auth = None
|
||||
|
||||
#: Dictionary mapping protocol or protocol and host to the URL of the proxy
|
||||
#: (e.g. {'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}) to
|
||||
#: be used on each :class:`Request <Request>`.
|
||||
self.proxies = {}
|
||||
|
||||
#: Event-handling hooks.
|
||||
self.hooks = default_hooks()
|
||||
|
||||
#: Dictionary of querystring data to attach to each
|
||||
#: :class:`Request <Request>`. The dictionary values may be lists for
|
||||
#: representing multivalued query parameters.
|
||||
self.params = {}
|
||||
|
||||
#: Stream response content default.
|
||||
self.stream = False
|
||||
|
||||
#: SSL Verification default.
|
||||
self.verify = True
|
||||
|
||||
#: SSL client certificate default, if String, path to ssl client
|
||||
#: cert file (.pem). If Tuple, ('cert', 'key') pair.
|
||||
self.cert = None
|
||||
|
||||
#: Maximum number of redirects allowed. If the request exceeds this
|
||||
#: limit, a :class:`TooManyRedirects` exception is raised.
|
||||
#: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is
|
||||
#: 30.
|
||||
self.max_redirects = DEFAULT_REDIRECT_LIMIT
|
||||
|
||||
#: Trust environment settings for proxy configuration, default
|
||||
#: authentication and similar.
|
||||
self.trust_env = True
|
||||
|
||||
#: A CookieJar containing all currently outstanding cookies set on this
|
||||
#: session. By default it is a
|
||||
#: :class:`RequestsCookieJar <requests.cookies.RequestsCookieJar>`, but
|
||||
#: may be any other ``cookielib.CookieJar`` compatible object.
|
||||
self.cookies = cookiejar_from_dict({})
|
||||
|
||||
# Default connection adapters.
|
||||
self.adapters = OrderedDict()
|
||||
self.mount('https://', HTTPAdapter())
|
||||
self.mount('http://', HTTPAdapter())
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def prepare_request(self, request):
|
||||
"""Constructs a :class:`PreparedRequest <PreparedRequest>` for
|
||||
transmission and returns it. The :class:`PreparedRequest` has settings
|
||||
merged from the :class:`Request <Request>` instance and those of the
|
||||
:class:`Session`.
|
||||
|
||||
:param request: :class:`Request` instance to prepare with this
|
||||
session's settings.
|
||||
:rtype: requests.PreparedRequest
|
||||
"""
|
||||
cookies = request.cookies or {}
|
||||
|
||||
# Bootstrap CookieJar.
|
||||
if not isinstance(cookies, cookielib.CookieJar):
|
||||
cookies = cookiejar_from_dict(cookies)
|
||||
|
||||
# Merge with session cookies
|
||||
merged_cookies = merge_cookies(
|
||||
merge_cookies(RequestsCookieJar(), self.cookies), cookies)
|
||||
|
||||
# Set environment's basic authentication if not explicitly set.
|
||||
auth = request.auth
|
||||
if self.trust_env and not auth and not self.auth:
|
||||
auth = get_netrc_auth(request.url)
|
||||
|
||||
p = PreparedRequest()
|
||||
p.prepare(
|
||||
method=request.method.upper(),
|
||||
url=request.url,
|
||||
files=request.files,
|
||||
data=request.data,
|
||||
json=request.json,
|
||||
headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
|
||||
params=merge_setting(request.params, self.params),
|
||||
auth=merge_setting(auth, self.auth),
|
||||
cookies=merged_cookies,
|
||||
hooks=merge_hooks(request.hooks, self.hooks),
|
||||
)
|
||||
return p
|
||||
|
||||
def request(self, method, url,
|
||||
params=None, data=None, headers=None, cookies=None, files=None,
|
||||
auth=None, timeout=None, allow_redirects=True, proxies=None,
|
||||
hooks=None, stream=None, verify=None, cert=None, json=None):
|
||||
"""Constructs a :class:`Request <Request>`, prepares it and sends it.
|
||||
Returns :class:`Response <Response>` object.
|
||||
|
||||
:param method: method for the new :class:`Request` object.
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param params: (optional) Dictionary or bytes to be sent in the query
|
||||
string for the :class:`Request`.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param json: (optional) json to send in the body of the
|
||||
:class:`Request`.
|
||||
:param headers: (optional) Dictionary of HTTP Headers to send with the
|
||||
:class:`Request`.
|
||||
:param cookies: (optional) Dict or CookieJar object to send with the
|
||||
:class:`Request`.
|
||||
:param files: (optional) Dictionary of ``'filename': file-like-objects``
|
||||
for multipart encoding upload.
|
||||
:param auth: (optional) Auth tuple or callable to enable
|
||||
Basic/Digest/Custom HTTP Auth.
|
||||
:param timeout: (optional) How long to wait for the server to send
|
||||
data before giving up, as a float, or a :ref:`(connect timeout,
|
||||
read timeout) <timeouts>` tuple.
|
||||
:type timeout: float or tuple
|
||||
:param allow_redirects: (optional) Set to True by default.
|
||||
:type allow_redirects: bool
|
||||
:param proxies: (optional) Dictionary mapping protocol or protocol and
|
||||
hostname to the URL of the proxy.
|
||||
:param stream: (optional) whether to immediately download the response
|
||||
content. Defaults to ``False``.
|
||||
:param verify: (optional) Either a boolean, in which case it controls whether we verify
|
||||
the server's TLS certificate, or a string, in which case it must be a path
|
||||
to a CA bundle to use. Defaults to ``True``.
|
||||
:param cert: (optional) if String, path to ssl client cert file (.pem).
|
||||
If Tuple, ('cert', 'key') pair.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
# Create the Request.
|
||||
req = Request(
|
||||
method=method.upper(),
|
||||
url=url,
|
||||
headers=headers,
|
||||
files=files,
|
||||
data=data or {},
|
||||
json=json,
|
||||
params=params or {},
|
||||
auth=auth,
|
||||
cookies=cookies,
|
||||
hooks=hooks,
|
||||
)
|
||||
prep = self.prepare_request(req)
|
||||
|
||||
proxies = proxies or {}
|
||||
|
||||
settings = self.merge_environment_settings(
|
||||
prep.url, proxies, stream, verify, cert
|
||||
)
|
||||
|
||||
# Send the request.
|
||||
send_kwargs = {
|
||||
'timeout': timeout,
|
||||
'allow_redirects': allow_redirects,
|
||||
}
|
||||
send_kwargs.update(settings)
|
||||
resp = self.send(prep, **send_kwargs)
|
||||
|
||||
return resp
|
||||
|
||||
def get(self, url, **kwargs):
|
||||
r"""Sends a GET request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
return self.request('GET', url, **kwargs)
|
||||
|
||||
def options(self, url, **kwargs):
|
||||
r"""Sends a OPTIONS request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
kwargs.setdefault('allow_redirects', True)
|
||||
return self.request('OPTIONS', url, **kwargs)
|
||||
|
||||
def head(self, url, **kwargs):
|
||||
r"""Sends a HEAD request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
kwargs.setdefault('allow_redirects', False)
|
||||
return self.request('HEAD', url, **kwargs)
|
||||
|
||||
def post(self, url, data=None, json=None, **kwargs):
|
||||
r"""Sends a POST request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param json: (optional) json to send in the body of the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return self.request('POST', url, data=data, json=json, **kwargs)
|
||||
|
||||
def put(self, url, data=None, **kwargs):
|
||||
r"""Sends a PUT request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return self.request('PUT', url, data=data, **kwargs)
|
||||
|
||||
def patch(self, url, data=None, **kwargs):
|
||||
r"""Sends a PATCH request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param data: (optional) Dictionary, list of tuples, bytes, or file-like
|
||||
object to send in the body of the :class:`Request`.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return self.request('PATCH', url, data=data, **kwargs)
|
||||
|
||||
def delete(self, url, **kwargs):
|
||||
r"""Sends a DELETE request. Returns :class:`Response` object.
|
||||
|
||||
:param url: URL for the new :class:`Request` object.
|
||||
:param \*\*kwargs: Optional arguments that ``request`` takes.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
return self.request('DELETE', url, **kwargs)
|
||||
|
||||
def send(self, request, **kwargs):
|
||||
"""Send a given PreparedRequest.
|
||||
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
# Set defaults that the hooks can utilize to ensure they always have
|
||||
# the correct parameters to reproduce the previous request.
|
||||
kwargs.setdefault('stream', self.stream)
|
||||
kwargs.setdefault('verify', self.verify)
|
||||
kwargs.setdefault('cert', self.cert)
|
||||
kwargs.setdefault('proxies', self.proxies)
|
||||
|
||||
# It's possible that users might accidentally send a Request object.
|
||||
# Guard against that specific failure case.
|
||||
if isinstance(request, Request):
|
||||
raise ValueError('You can only send PreparedRequests.')
|
||||
|
||||
# Set up variables needed for resolve_redirects and dispatching of hooks
|
||||
allow_redirects = kwargs.pop('allow_redirects', True)
|
||||
stream = kwargs.get('stream')
|
||||
hooks = request.hooks
|
||||
|
||||
# Get the appropriate adapter to use
|
||||
adapter = self.get_adapter(url=request.url)
|
||||
|
||||
# Start time (approximately) of the request
|
||||
start = preferred_clock()
|
||||
|
||||
# Send the request
|
||||
r = adapter.send(request, **kwargs)
|
||||
|
||||
# Total elapsed time of the request (approximately)
|
||||
elapsed = preferred_clock() - start
|
||||
r.elapsed = timedelta(seconds=elapsed)
|
||||
|
||||
# Response manipulation hooks
|
||||
r = dispatch_hook('response', hooks, r, **kwargs)
|
||||
|
||||
# Persist cookies
|
||||
if r.history:
|
||||
|
||||
# If the hooks create history then we want those cookies too
|
||||
for resp in r.history:
|
||||
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)
|
||||
|
||||
extract_cookies_to_jar(self.cookies, request, r.raw)
|
||||
|
||||
# Redirect resolving generator.
|
||||
gen = self.resolve_redirects(r, request, **kwargs)
|
||||
|
||||
# Resolve redirects if allowed.
|
||||
history = [resp for resp in gen] if allow_redirects else []
|
||||
|
||||
# Shuffle things around if there's history.
|
||||
if history:
|
||||
# Insert the first (original) request at the start
|
||||
history.insert(0, r)
|
||||
# Get the last request made
|
||||
r = history.pop()
|
||||
r.history = history
|
||||
|
||||
# If redirects aren't being followed, store the response on the Request for Response.next().
|
||||
if not allow_redirects:
|
||||
try:
|
||||
r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs))
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
if not stream:
|
||||
r.content
|
||||
|
||||
return r
|
||||
|
||||
def merge_environment_settings(self, url, proxies, stream, verify, cert):
|
||||
"""
|
||||
Check the environment and merge it with some settings.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
# Gather clues from the surrounding environment.
|
||||
if self.trust_env:
|
||||
# Set environment's proxies.
|
||||
no_proxy = proxies.get('no_proxy') if proxies is not None else None
|
||||
env_proxies = get_environ_proxies(url, no_proxy=no_proxy)
|
||||
for (k, v) in env_proxies.items():
|
||||
proxies.setdefault(k, v)
|
||||
|
||||
# Look for requests environment configuration and be compatible
|
||||
# with cURL.
|
||||
if verify is True or verify is None:
|
||||
verify = (os.environ.get('REQUESTS_CA_BUNDLE') or
|
||||
os.environ.get('CURL_CA_BUNDLE'))
|
||||
|
||||
# Merge all the kwargs.
|
||||
proxies = merge_setting(proxies, self.proxies)
|
||||
stream = merge_setting(stream, self.stream)
|
||||
verify = merge_setting(verify, self.verify)
|
||||
cert = merge_setting(cert, self.cert)
|
||||
|
||||
return {'verify': verify, 'proxies': proxies, 'stream': stream,
|
||||
'cert': cert}
|
||||
|
||||
def get_adapter(self, url):
|
||||
"""
|
||||
Returns the appropriate connection adapter for the given URL.
|
||||
|
||||
:rtype: requests.adapters.BaseAdapter
|
||||
"""
|
||||
for (prefix, adapter) in self.adapters.items():
|
||||
|
||||
if url.lower().startswith(prefix.lower()):
|
||||
return adapter
|
||||
|
||||
# Nothing matches :-/
|
||||
raise InvalidSchema("No connection adapters were found for {!r}".format(url))
|
||||
|
||||
def close(self):
|
||||
"""Closes all adapters and as such the session"""
|
||||
for v in self.adapters.values():
|
||||
v.close()
|
||||
|
||||
def mount(self, prefix, adapter):
|
||||
"""Registers a connection adapter to a prefix.
|
||||
|
||||
Adapters are sorted in descending order by prefix length.
|
||||
"""
|
||||
self.adapters[prefix] = adapter
|
||||
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)]
|
||||
|
||||
for key in keys_to_move:
|
||||
self.adapters[key] = self.adapters.pop(key)
|
||||
|
||||
def __getstate__(self):
|
||||
state = {attr: getattr(self, attr, None) for attr in self.__attrs__}
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
for attr, value in state.items():
|
||||
setattr(self, attr, value)
|
||||
|
||||
|
||||
def session():
|
||||
"""
|
||||
Returns a :class:`Session` for context-management.
|
||||
|
||||
.. deprecated:: 1.0.0
|
||||
|
||||
This method has been deprecated since version 1.0.0 and is only kept for
|
||||
backwards compatibility. New code should use :class:`~requests.sessions.Session`
|
||||
to create a session. This may be removed at a future date.
|
||||
|
||||
:rtype: Session
|
||||
"""
|
||||
return Session()
|
|
@ -0,0 +1,123 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
r"""
|
||||
The ``codes`` object defines a mapping from common names for HTTP statuses
|
||||
to their numerical codes, accessible either as attributes or as dictionary
|
||||
items.
|
||||
|
||||
Example::
|
||||
|
||||
>>> import requests
|
||||
>>> requests.codes['temporary_redirect']
|
||||
307
|
||||
>>> requests.codes.teapot
|
||||
418
|
||||
>>> requests.codes['\o/']
|
||||
200
|
||||
|
||||
Some codes have multiple names, and both upper- and lower-case versions of
|
||||
the names are allowed. For example, ``codes.ok``, ``codes.OK``, and
|
||||
``codes.okay`` all correspond to the HTTP status code 200.
|
||||
"""
|
||||
|
||||
from .structures import LookupDict
|
||||
|
||||
_codes = {
|
||||
|
||||
# Informational.
|
||||
100: ('continue',),
|
||||
101: ('switching_protocols',),
|
||||
102: ('processing',),
|
||||
103: ('checkpoint',),
|
||||
122: ('uri_too_long', 'request_uri_too_long'),
|
||||
200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
|
||||
201: ('created',),
|
||||
202: ('accepted',),
|
||||
203: ('non_authoritative_info', 'non_authoritative_information'),
|
||||
204: ('no_content',),
|
||||
205: ('reset_content', 'reset'),
|
||||
206: ('partial_content', 'partial'),
|
||||
207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
|
||||
208: ('already_reported',),
|
||||
226: ('im_used',),
|
||||
|
||||
# Redirection.
|
||||
300: ('multiple_choices',),
|
||||
301: ('moved_permanently', 'moved', '\\o-'),
|
||||
302: ('found',),
|
||||
303: ('see_other', 'other'),
|
||||
304: ('not_modified',),
|
||||
305: ('use_proxy',),
|
||||
306: ('switch_proxy',),
|
||||
307: ('temporary_redirect', 'temporary_moved', 'temporary'),
|
||||
308: ('permanent_redirect',
|
||||
'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
|
||||
|
||||
# Client Error.
|
||||
400: ('bad_request', 'bad'),
|
||||
401: ('unauthorized',),
|
||||
402: ('payment_required', 'payment'),
|
||||
403: ('forbidden',),
|
||||
404: ('not_found', '-o-'),
|
||||
405: ('method_not_allowed', 'not_allowed'),
|
||||
406: ('not_acceptable',),
|
||||
407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
|
||||
408: ('request_timeout', 'timeout'),
|
||||
409: ('conflict',),
|
||||
410: ('gone',),
|
||||
411: ('length_required',),
|
||||
412: ('precondition_failed', 'precondition'),
|
||||
413: ('request_entity_too_large',),
|
||||
414: ('request_uri_too_large',),
|
||||
415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
|
||||
416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
|
||||
417: ('expectation_failed',),
|
||||
418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
|
||||
421: ('misdirected_request',),
|
||||
422: ('unprocessable_entity', 'unprocessable'),
|
||||
423: ('locked',),
|
||||
424: ('failed_dependency', 'dependency'),
|
||||
425: ('unordered_collection', 'unordered'),
|
||||
426: ('upgrade_required', 'upgrade'),
|
||||
428: ('precondition_required', 'precondition'),
|
||||
429: ('too_many_requests', 'too_many'),
|
||||
431: ('header_fields_too_large', 'fields_too_large'),
|
||||
444: ('no_response', 'none'),
|
||||
449: ('retry_with', 'retry'),
|
||||
450: ('blocked_by_windows_parental_controls', 'parental_controls'),
|
||||
451: ('unavailable_for_legal_reasons', 'legal_reasons'),
|
||||
499: ('client_closed_request',),
|
||||
|
||||
# Server Error.
|
||||
500: ('internal_server_error', 'server_error', '/o\\', '✗'),
|
||||
501: ('not_implemented',),
|
||||
502: ('bad_gateway',),
|
||||
503: ('service_unavailable', 'unavailable'),
|
||||
504: ('gateway_timeout',),
|
||||
505: ('http_version_not_supported', 'http_version'),
|
||||
506: ('variant_also_negotiates',),
|
||||
507: ('insufficient_storage',),
|
||||
509: ('bandwidth_limit_exceeded', 'bandwidth'),
|
||||
510: ('not_extended',),
|
||||
511: ('network_authentication_required', 'network_auth', 'network_authentication'),
|
||||
}
|
||||
|
||||
codes = LookupDict(name='status_codes')
|
||||
|
||||
def _init():
|
||||
for code, titles in _codes.items():
|
||||
for title in titles:
|
||||
setattr(codes, title, code)
|
||||
if not title.startswith(('\\', '/')):
|
||||
setattr(codes, title.upper(), code)
|
||||
|
||||
def doc(code):
|
||||
names = ', '.join('``%s``' % n for n in _codes[code])
|
||||
return '* %d: %s' % (code, names)
|
||||
|
||||
global __doc__
|
||||
__doc__ = (__doc__ + '\n' +
|
||||
'\n'.join(doc(code) for code in sorted(_codes))
|
||||
if __doc__ is not None else None)
|
||||
|
||||
_init()
|
|
@ -0,0 +1,105 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.structures
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Data structures that power Requests.
|
||||
"""
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from .compat import Mapping, MutableMapping
|
||||
|
||||
|
||||
class CaseInsensitiveDict(MutableMapping):
|
||||
"""A case-insensitive ``dict``-like object.
|
||||
|
||||
Implements all methods and operations of
|
||||
``MutableMapping`` as well as dict's ``copy``. Also
|
||||
provides ``lower_items``.
|
||||
|
||||
All keys are expected to be strings. The structure remembers the
|
||||
case of the last key to be set, and ``iter(instance)``,
|
||||
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
||||
will contain case-sensitive keys. However, querying and contains
|
||||
testing is case insensitive::
|
||||
|
||||
cid = CaseInsensitiveDict()
|
||||
cid['Accept'] = 'application/json'
|
||||
cid['aCCEPT'] == 'application/json' # True
|
||||
list(cid) == ['Accept'] # True
|
||||
|
||||
For example, ``headers['content-encoding']`` will return the
|
||||
value of a ``'Content-Encoding'`` response header, regardless
|
||||
of how the header name was originally stored.
|
||||
|
||||
If the constructor, ``.update``, or equality comparison
|
||||
operations are given keys that have equal ``.lower()``s, the
|
||||
behavior is undefined.
|
||||
"""
|
||||
|
||||
def __init__(self, data=None, **kwargs):
|
||||
self._store = OrderedDict()
|
||||
if data is None:
|
||||
data = {}
|
||||
self.update(data, **kwargs)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Use the lowercased key for lookups, but store the actual
|
||||
# key alongside the value.
|
||||
self._store[key.lower()] = (key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._store[key.lower()][1]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._store[key.lower()]
|
||||
|
||||
def __iter__(self):
|
||||
return (casedkey for casedkey, mappedvalue in self._store.values())
|
||||
|
||||
def __len__(self):
|
||||
return len(self._store)
|
||||
|
||||
def lower_items(self):
|
||||
"""Like iteritems(), but with all lowercase keys."""
|
||||
return (
|
||||
(lowerkey, keyval[1])
|
||||
for (lowerkey, keyval)
|
||||
in self._store.items()
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Mapping):
|
||||
other = CaseInsensitiveDict(other)
|
||||
else:
|
||||
return NotImplemented
|
||||
# Compare insensitively
|
||||
return dict(self.lower_items()) == dict(other.lower_items())
|
||||
|
||||
# Copy is required
|
||||
def copy(self):
|
||||
return CaseInsensitiveDict(self._store.values())
|
||||
|
||||
def __repr__(self):
|
||||
return str(dict(self.items()))
|
||||
|
||||
|
||||
class LookupDict(dict):
|
||||
"""Dictionary lookup object."""
|
||||
|
||||
def __init__(self, name=None):
|
||||
self.name = name
|
||||
super(LookupDict, self).__init__()
|
||||
|
||||
def __repr__(self):
|
||||
return '<lookup \'%s\'>' % (self.name)
|
||||
|
||||
def __getitem__(self, key):
|
||||
# We allow fall-through here, so values default to None
|
||||
|
||||
return self.__dict__.get(key, None)
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self.__dict__.get(key, default)
|
|
@ -0,0 +1,982 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.utils
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
This module provides utility functions that are used within Requests
|
||||
that are also useful for external consumption.
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import contextlib
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import struct
|
||||
import sys
|
||||
import tempfile
|
||||
import warnings
|
||||
import zipfile
|
||||
from collections import OrderedDict
|
||||
|
||||
from .__version__ import __version__
|
||||
from . import certs
|
||||
# to_native_string is unused here, but imported here for backwards compatibility
|
||||
from ._internal_utils import to_native_string
|
||||
from .compat import parse_http_list as _parse_list_header
|
||||
from .compat import (
|
||||
quote, urlparse, bytes, str, unquote, getproxies,
|
||||
proxy_bypass, urlunparse, basestring, integer_types, is_py3,
|
||||
proxy_bypass_environment, getproxies_environment, Mapping)
|
||||
from .cookies import cookiejar_from_dict
|
||||
from .structures import CaseInsensitiveDict
|
||||
from .exceptions import (
|
||||
InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
|
||||
|
||||
NETRC_FILES = ('.netrc', '_netrc')
|
||||
|
||||
DEFAULT_CA_BUNDLE_PATH = certs.where()
|
||||
|
||||
DEFAULT_PORTS = {'http': 80, 'https': 443}
|
||||
|
||||
|
||||
if sys.platform == 'win32':
|
||||
# provide a proxy_bypass version on Windows without DNS lookups
|
||||
|
||||
def proxy_bypass_registry(host):
|
||||
try:
|
||||
if is_py3:
|
||||
import winreg
|
||||
else:
|
||||
import _winreg as winreg
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
try:
|
||||
internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
|
||||
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
|
||||
# ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
|
||||
proxyEnable = int(winreg.QueryValueEx(internetSettings,
|
||||
'ProxyEnable')[0])
|
||||
# ProxyOverride is almost always a string
|
||||
proxyOverride = winreg.QueryValueEx(internetSettings,
|
||||
'ProxyOverride')[0]
|
||||
except OSError:
|
||||
return False
|
||||
if not proxyEnable or not proxyOverride:
|
||||
return False
|
||||
|
||||
# make a check value list from the registry entry: replace the
|
||||
# '<local>' string by the localhost entry and the corresponding
|
||||
# canonical entry.
|
||||
proxyOverride = proxyOverride.split(';')
|
||||
# now check if we match one of the registry values.
|
||||
for test in proxyOverride:
|
||||
if test == '<local>':
|
||||
if '.' not in host:
|
||||
return True
|
||||
test = test.replace(".", r"\.") # mask dots
|
||||
test = test.replace("*", r".*") # change glob sequence
|
||||
test = test.replace("?", r".") # change glob char
|
||||
if re.match(test, host, re.I):
|
||||
return True
|
||||
return False
|
||||
|
||||
def proxy_bypass(host): # noqa
|
||||
"""Return True, if the host should be bypassed.
|
||||
|
||||
Checks proxy settings gathered from the environment, if specified,
|
||||
or the registry.
|
||||
"""
|
||||
if getproxies_environment():
|
||||
return proxy_bypass_environment(host)
|
||||
else:
|
||||
return proxy_bypass_registry(host)
|
||||
|
||||
|
||||
def dict_to_sequence(d):
|
||||
"""Returns an internal sequence dictionary update."""
|
||||
|
||||
if hasattr(d, 'items'):
|
||||
d = d.items()
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def super_len(o):
|
||||
total_length = None
|
||||
current_position = 0
|
||||
|
||||
if hasattr(o, '__len__'):
|
||||
total_length = len(o)
|
||||
|
||||
elif hasattr(o, 'len'):
|
||||
total_length = o.len
|
||||
|
||||
elif hasattr(o, 'fileno'):
|
||||
try:
|
||||
fileno = o.fileno()
|
||||
except io.UnsupportedOperation:
|
||||
pass
|
||||
else:
|
||||
total_length = os.fstat(fileno).st_size
|
||||
|
||||
# Having used fstat to determine the file length, we need to
|
||||
# confirm that this file was opened up in binary mode.
|
||||
if 'b' not in o.mode:
|
||||
warnings.warn((
|
||||
"Requests has determined the content-length for this "
|
||||
"request using the binary size of the file: however, the "
|
||||
"file has been opened in text mode (i.e. without the 'b' "
|
||||
"flag in the mode). This may lead to an incorrect "
|
||||
"content-length. In Requests 3.0, support will be removed "
|
||||
"for files in text mode."),
|
||||
FileModeWarning
|
||||
)
|
||||
|
||||
if hasattr(o, 'tell'):
|
||||
try:
|
||||
current_position = o.tell()
|
||||
except (OSError, IOError):
|
||||
# This can happen in some weird situations, such as when the file
|
||||
# is actually a special file descriptor like stdin. In this
|
||||
# instance, we don't know what the length is, so set it to zero and
|
||||
# let requests chunk it instead.
|
||||
if total_length is not None:
|
||||
current_position = total_length
|
||||
else:
|
||||
if hasattr(o, 'seek') and total_length is None:
|
||||
# StringIO and BytesIO have seek but no useable fileno
|
||||
try:
|
||||
# seek to end of file
|
||||
o.seek(0, 2)
|
||||
total_length = o.tell()
|
||||
|
||||
# seek back to current position to support
|
||||
# partially read file-like objects
|
||||
o.seek(current_position or 0)
|
||||
except (OSError, IOError):
|
||||
total_length = 0
|
||||
|
||||
if total_length is None:
|
||||
total_length = 0
|
||||
|
||||
return max(0, total_length - current_position)
|
||||
|
||||
|
||||
def get_netrc_auth(url, raise_errors=False):
|
||||
"""Returns the Requests tuple auth for a given url from netrc."""
|
||||
|
||||
try:
|
||||
from netrc import netrc, NetrcParseError
|
||||
|
||||
netrc_path = None
|
||||
|
||||
for f in NETRC_FILES:
|
||||
try:
|
||||
loc = os.path.expanduser('~/{}'.format(f))
|
||||
except KeyError:
|
||||
# os.path.expanduser can fail when $HOME is undefined and
|
||||
# getpwuid fails. See https://bugs.python.org/issue20164 &
|
||||
# https://github.com/psf/requests/issues/1846
|
||||
return
|
||||
|
||||
if os.path.exists(loc):
|
||||
netrc_path = loc
|
||||
break
|
||||
|
||||
# Abort early if there isn't one.
|
||||
if netrc_path is None:
|
||||
return
|
||||
|
||||
ri = urlparse(url)
|
||||
|
||||
# Strip port numbers from netloc. This weird `if...encode`` dance is
|
||||
# used for Python 3.2, which doesn't support unicode literals.
|
||||
splitstr = b':'
|
||||
if isinstance(url, str):
|
||||
splitstr = splitstr.decode('ascii')
|
||||
host = ri.netloc.split(splitstr)[0]
|
||||
|
||||
try:
|
||||
_netrc = netrc(netrc_path).authenticators(host)
|
||||
if _netrc:
|
||||
# Return with login / password
|
||||
login_i = (0 if _netrc[0] else 1)
|
||||
return (_netrc[login_i], _netrc[2])
|
||||
except (NetrcParseError, IOError):
|
||||
# If there was a parsing error or a permissions issue reading the file,
|
||||
# we'll just skip netrc auth unless explicitly asked to raise errors.
|
||||
if raise_errors:
|
||||
raise
|
||||
|
||||
# AppEngine hackiness.
|
||||
except (ImportError, AttributeError):
|
||||
pass
|
||||
|
||||
|
||||
def guess_filename(obj):
|
||||
"""Tries to guess the filename of the given object."""
|
||||
name = getattr(obj, 'name', None)
|
||||
if (name and isinstance(name, basestring) and name[0] != '<' and
|
||||
name[-1] != '>'):
|
||||
return os.path.basename(name)
|
||||
|
||||
|
||||
def extract_zipped_paths(path):
|
||||
"""Replace nonexistent paths that look like they refer to a member of a zip
|
||||
archive with the location of an extracted copy of the target, or else
|
||||
just return the provided path unchanged.
|
||||
"""
|
||||
if os.path.exists(path):
|
||||
# this is already a valid path, no need to do anything further
|
||||
return path
|
||||
|
||||
# find the first valid part of the provided path and treat that as a zip archive
|
||||
# assume the rest of the path is the name of a member in the archive
|
||||
archive, member = os.path.split(path)
|
||||
while archive and not os.path.exists(archive):
|
||||
archive, prefix = os.path.split(archive)
|
||||
member = '/'.join([prefix, member])
|
||||
|
||||
if not zipfile.is_zipfile(archive):
|
||||
return path
|
||||
|
||||
zip_file = zipfile.ZipFile(archive)
|
||||
if member not in zip_file.namelist():
|
||||
return path
|
||||
|
||||
# we have a valid zip archive and a valid member of that archive
|
||||
tmp = tempfile.gettempdir()
|
||||
extracted_path = os.path.join(tmp, *member.split('/'))
|
||||
if not os.path.exists(extracted_path):
|
||||
extracted_path = zip_file.extract(member, path=tmp)
|
||||
|
||||
return extracted_path
|
||||
|
||||
|
||||
def from_key_val_list(value):
|
||||
"""Take an object and test to see if it can be represented as a
|
||||
dictionary. Unless it can not be represented as such, return an
|
||||
OrderedDict, e.g.,
|
||||
|
||||
::
|
||||
|
||||
>>> from_key_val_list([('key', 'val')])
|
||||
OrderedDict([('key', 'val')])
|
||||
>>> from_key_val_list('string')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: cannot encode objects that are not 2-tuples
|
||||
>>> from_key_val_list({'key': 'val'})
|
||||
OrderedDict([('key', 'val')])
|
||||
|
||||
:rtype: OrderedDict
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
if isinstance(value, (str, bytes, bool, int)):
|
||||
raise ValueError('cannot encode objects that are not 2-tuples')
|
||||
|
||||
return OrderedDict(value)
|
||||
|
||||
|
||||
def to_key_val_list(value):
|
||||
"""Take an object and test to see if it can be represented as a
|
||||
dictionary. If it can be, return a list of tuples, e.g.,
|
||||
|
||||
::
|
||||
|
||||
>>> to_key_val_list([('key', 'val')])
|
||||
[('key', 'val')]
|
||||
>>> to_key_val_list({'key': 'val'})
|
||||
[('key', 'val')]
|
||||
>>> to_key_val_list('string')
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: cannot encode objects that are not 2-tuples
|
||||
|
||||
:rtype: list
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
if isinstance(value, (str, bytes, bool, int)):
|
||||
raise ValueError('cannot encode objects that are not 2-tuples')
|
||||
|
||||
if isinstance(value, Mapping):
|
||||
value = value.items()
|
||||
|
||||
return list(value)
|
||||
|
||||
|
||||
# From mitsuhiko/werkzeug (used with permission).
|
||||
def parse_list_header(value):
|
||||
"""Parse lists as described by RFC 2068 Section 2.
|
||||
|
||||
In particular, parse comma-separated lists where the elements of
|
||||
the list may include quoted-strings. A quoted-string could
|
||||
contain a comma. A non-quoted string could have quotes in the
|
||||
middle. Quotes are removed automatically after parsing.
|
||||
|
||||
It basically works like :func:`parse_set_header` just that items
|
||||
may appear multiple times and case sensitivity is preserved.
|
||||
|
||||
The return value is a standard :class:`list`:
|
||||
|
||||
>>> parse_list_header('token, "quoted value"')
|
||||
['token', 'quoted value']
|
||||
|
||||
To create a header from the :class:`list` again, use the
|
||||
:func:`dump_header` function.
|
||||
|
||||
:param value: a string with a list header.
|
||||
:return: :class:`list`
|
||||
:rtype: list
|
||||
"""
|
||||
result = []
|
||||
for item in _parse_list_header(value):
|
||||
if item[:1] == item[-1:] == '"':
|
||||
item = unquote_header_value(item[1:-1])
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
|
||||
# From mitsuhiko/werkzeug (used with permission).
|
||||
def parse_dict_header(value):
|
||||
"""Parse lists of key, value pairs as described by RFC 2068 Section 2 and
|
||||
convert them into a python dict:
|
||||
|
||||
>>> d = parse_dict_header('foo="is a fish", bar="as well"')
|
||||
>>> type(d) is dict
|
||||
True
|
||||
>>> sorted(d.items())
|
||||
[('bar', 'as well'), ('foo', 'is a fish')]
|
||||
|
||||
If there is no value for a key it will be `None`:
|
||||
|
||||
>>> parse_dict_header('key_without_value')
|
||||
{'key_without_value': None}
|
||||
|
||||
To create a header from the :class:`dict` again, use the
|
||||
:func:`dump_header` function.
|
||||
|
||||
:param value: a string with a dict header.
|
||||
:return: :class:`dict`
|
||||
:rtype: dict
|
||||
"""
|
||||
result = {}
|
||||
for item in _parse_list_header(value):
|
||||
if '=' not in item:
|
||||
result[item] = None
|
||||
continue
|
||||
name, value = item.split('=', 1)
|
||||
if value[:1] == value[-1:] == '"':
|
||||
value = unquote_header_value(value[1:-1])
|
||||
result[name] = value
|
||||
return result
|
||||
|
||||
|
||||
# From mitsuhiko/werkzeug (used with permission).
|
||||
def unquote_header_value(value, is_filename=False):
|
||||
r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
|
||||
This does not use the real unquoting but what browsers are actually
|
||||
using for quoting.
|
||||
|
||||
:param value: the header value to unquote.
|
||||
:rtype: str
|
||||
"""
|
||||
if value and value[0] == value[-1] == '"':
|
||||
# this is not the real unquoting, but fixing this so that the
|
||||
# RFC is met will result in bugs with internet explorer and
|
||||
# probably some other browsers as well. IE for example is
|
||||
# uploading files with "C:\foo\bar.txt" as filename
|
||||
value = value[1:-1]
|
||||
|
||||
# if this is a filename and the starting characters look like
|
||||
# a UNC path, then just return the value without quotes. Using the
|
||||
# replace sequence below on a UNC path has the effect of turning
|
||||
# the leading double slash into a single slash and then
|
||||
# _fix_ie_filename() doesn't work correctly. See #458.
|
||||
if not is_filename or value[:2] != '\\\\':
|
||||
return value.replace('\\\\', '\\').replace('\\"', '"')
|
||||
return value
|
||||
|
||||
|
||||
def dict_from_cookiejar(cj):
|
||||
"""Returns a key/value dictionary from a CookieJar.
|
||||
|
||||
:param cj: CookieJar object to extract cookies from.
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
cookie_dict = {}
|
||||
|
||||
for cookie in cj:
|
||||
cookie_dict[cookie.name] = cookie.value
|
||||
|
||||
return cookie_dict
|
||||
|
||||
|
||||
def add_dict_to_cookiejar(cj, cookie_dict):
|
||||
"""Returns a CookieJar from a key/value dictionary.
|
||||
|
||||
:param cj: CookieJar to insert cookies into.
|
||||
:param cookie_dict: Dict of key/values to insert into CookieJar.
|
||||
:rtype: CookieJar
|
||||
"""
|
||||
|
||||
return cookiejar_from_dict(cookie_dict, cj)
|
||||
|
||||
|
||||
def get_encodings_from_content(content):
|
||||
"""Returns encodings from given content string.
|
||||
|
||||
:param content: bytestring to extract encodings from.
|
||||
"""
|
||||
warnings.warn((
|
||||
'In requests 3.0, get_encodings_from_content will be removed. For '
|
||||
'more information, please see the discussion on issue #2266. (This'
|
||||
' warning should only appear once.)'),
|
||||
DeprecationWarning)
|
||||
|
||||
charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
|
||||
pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
|
||||
xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
|
||||
|
||||
return (charset_re.findall(content) +
|
||||
pragma_re.findall(content) +
|
||||
xml_re.findall(content))
|
||||
|
||||
|
||||
def _parse_content_type_header(header):
|
||||
"""Returns content type and parameters from given header
|
||||
|
||||
:param header: string
|
||||
:return: tuple containing content type and dictionary of
|
||||
parameters
|
||||
"""
|
||||
|
||||
tokens = header.split(';')
|
||||
content_type, params = tokens[0].strip(), tokens[1:]
|
||||
params_dict = {}
|
||||
items_to_strip = "\"' "
|
||||
|
||||
for param in params:
|
||||
param = param.strip()
|
||||
if param:
|
||||
key, value = param, True
|
||||
index_of_equals = param.find("=")
|
||||
if index_of_equals != -1:
|
||||
key = param[:index_of_equals].strip(items_to_strip)
|
||||
value = param[index_of_equals + 1:].strip(items_to_strip)
|
||||
params_dict[key.lower()] = value
|
||||
return content_type, params_dict
|
||||
|
||||
|
||||
def get_encoding_from_headers(headers):
|
||||
"""Returns encodings from given HTTP Header Dict.
|
||||
|
||||
:param headers: dictionary to extract encoding from.
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
content_type = headers.get('content-type')
|
||||
|
||||
if not content_type:
|
||||
return None
|
||||
|
||||
content_type, params = _parse_content_type_header(content_type)
|
||||
|
||||
if 'charset' in params:
|
||||
return params['charset'].strip("'\"")
|
||||
|
||||
if 'text' in content_type:
|
||||
return 'ISO-8859-1'
|
||||
|
||||
|
||||
def stream_decode_response_unicode(iterator, r):
|
||||
"""Stream decodes a iterator."""
|
||||
|
||||
if r.encoding is None:
|
||||
for item in iterator:
|
||||
yield item
|
||||
return
|
||||
|
||||
decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
|
||||
for chunk in iterator:
|
||||
rv = decoder.decode(chunk)
|
||||
if rv:
|
||||
yield rv
|
||||
rv = decoder.decode(b'', final=True)
|
||||
if rv:
|
||||
yield rv
|
||||
|
||||
|
||||
def iter_slices(string, slice_length):
|
||||
"""Iterate over slices of a string."""
|
||||
pos = 0
|
||||
if slice_length is None or slice_length <= 0:
|
||||
slice_length = len(string)
|
||||
while pos < len(string):
|
||||
yield string[pos:pos + slice_length]
|
||||
pos += slice_length
|
||||
|
||||
|
||||
def get_unicode_from_response(r):
|
||||
"""Returns the requested content back in unicode.
|
||||
|
||||
:param r: Response object to get unicode content from.
|
||||
|
||||
Tried:
|
||||
|
||||
1. charset from content-type
|
||||
2. fall back and replace all unicode characters
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
warnings.warn((
|
||||
'In requests 3.0, get_unicode_from_response will be removed. For '
|
||||
'more information, please see the discussion on issue #2266. (This'
|
||||
' warning should only appear once.)'),
|
||||
DeprecationWarning)
|
||||
|
||||
tried_encodings = []
|
||||
|
||||
# Try charset from content-type
|
||||
encoding = get_encoding_from_headers(r.headers)
|
||||
|
||||
if encoding:
|
||||
try:
|
||||
return str(r.content, encoding)
|
||||
except UnicodeError:
|
||||
tried_encodings.append(encoding)
|
||||
|
||||
# Fall back:
|
||||
try:
|
||||
return str(r.content, encoding, errors='replace')
|
||||
except TypeError:
|
||||
return r.content
|
||||
|
||||
|
||||
# The unreserved URI characters (RFC 3986)
|
||||
UNRESERVED_SET = frozenset(
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
|
||||
|
||||
|
||||
def unquote_unreserved(uri):
|
||||
"""Un-escape any percent-escape sequences in a URI that are unreserved
|
||||
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
parts = uri.split('%')
|
||||
for i in range(1, len(parts)):
|
||||
h = parts[i][0:2]
|
||||
if len(h) == 2 and h.isalnum():
|
||||
try:
|
||||
c = chr(int(h, 16))
|
||||
except ValueError:
|
||||
raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
|
||||
|
||||
if c in UNRESERVED_SET:
|
||||
parts[i] = c + parts[i][2:]
|
||||
else:
|
||||
parts[i] = '%' + parts[i]
|
||||
else:
|
||||
parts[i] = '%' + parts[i]
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
def requote_uri(uri):
|
||||
"""Re-quote the given URI.
|
||||
|
||||
This function passes the given URI through an unquote/quote cycle to
|
||||
ensure that it is fully and consistently quoted.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
|
||||
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
|
||||
try:
|
||||
# Unquote only the unreserved characters
|
||||
# Then quote only illegal characters (do not quote reserved,
|
||||
# unreserved, or '%')
|
||||
return quote(unquote_unreserved(uri), safe=safe_with_percent)
|
||||
except InvalidURL:
|
||||
# We couldn't unquote the given URI, so let's try quoting it, but
|
||||
# there may be unquoted '%'s in the URI. We need to make sure they're
|
||||
# properly quoted so they do not cause issues elsewhere.
|
||||
return quote(uri, safe=safe_without_percent)
|
||||
|
||||
|
||||
def address_in_network(ip, net):
|
||||
"""This function allows you to check if an IP belongs to a network subnet
|
||||
|
||||
Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
|
||||
returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
|
||||
netaddr, bits = net.split('/')
|
||||
netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
|
||||
network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
|
||||
return (ipaddr & netmask) == (network & netmask)
|
||||
|
||||
|
||||
def dotted_netmask(mask):
|
||||
"""Converts mask from /xx format to xxx.xxx.xxx.xxx
|
||||
|
||||
Example: if mask is 24 function returns 255.255.255.0
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
bits = 0xffffffff ^ (1 << 32 - mask) - 1
|
||||
return socket.inet_ntoa(struct.pack('>I', bits))
|
||||
|
||||
|
||||
def is_ipv4_address(string_ip):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
try:
|
||||
socket.inet_aton(string_ip)
|
||||
except socket.error:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_cidr(string_network):
|
||||
"""
|
||||
Very simple check of the cidr format in no_proxy variable.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
if string_network.count('/') == 1:
|
||||
try:
|
||||
mask = int(string_network.split('/')[1])
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
if mask < 1 or mask > 32:
|
||||
return False
|
||||
|
||||
try:
|
||||
socket.inet_aton(string_network.split('/')[0])
|
||||
except socket.error:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def set_environ(env_name, value):
|
||||
"""Set the environment variable 'env_name' to 'value'
|
||||
|
||||
Save previous value, yield, and then restore the previous value stored in
|
||||
the environment variable 'env_name'.
|
||||
|
||||
If 'value' is None, do nothing"""
|
||||
value_changed = value is not None
|
||||
if value_changed:
|
||||
old_value = os.environ.get(env_name)
|
||||
os.environ[env_name] = value
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
if value_changed:
|
||||
if old_value is None:
|
||||
del os.environ[env_name]
|
||||
else:
|
||||
os.environ[env_name] = old_value
|
||||
|
||||
|
||||
def should_bypass_proxies(url, no_proxy):
|
||||
"""
|
||||
Returns whether we should bypass proxies or not.
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
# Prioritize lowercase environment variables over uppercase
|
||||
# to keep a consistent behaviour with other http projects (curl, wget).
|
||||
get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
|
||||
|
||||
# First check whether no_proxy is defined. If it is, check that the URL
|
||||
# we're getting isn't in the no_proxy list.
|
||||
no_proxy_arg = no_proxy
|
||||
if no_proxy is None:
|
||||
no_proxy = get_proxy('no_proxy')
|
||||
parsed = urlparse(url)
|
||||
|
||||
if parsed.hostname is None:
|
||||
# URLs don't always have hostnames, e.g. file:/// urls.
|
||||
return True
|
||||
|
||||
if no_proxy:
|
||||
# We need to check whether we match here. We need to see if we match
|
||||
# the end of the hostname, both with and without the port.
|
||||
no_proxy = (
|
||||
host for host in no_proxy.replace(' ', '').split(',') if host
|
||||
)
|
||||
|
||||
if is_ipv4_address(parsed.hostname):
|
||||
for proxy_ip in no_proxy:
|
||||
if is_valid_cidr(proxy_ip):
|
||||
if address_in_network(parsed.hostname, proxy_ip):
|
||||
return True
|
||||
elif parsed.hostname == proxy_ip:
|
||||
# If no_proxy ip was defined in plain IP notation instead of cidr notation &
|
||||
# matches the IP of the index
|
||||
return True
|
||||
else:
|
||||
host_with_port = parsed.hostname
|
||||
if parsed.port:
|
||||
host_with_port += ':{}'.format(parsed.port)
|
||||
|
||||
for host in no_proxy:
|
||||
if parsed.hostname.endswith(host) or host_with_port.endswith(host):
|
||||
# The URL does match something in no_proxy, so we don't want
|
||||
# to apply the proxies on this URL.
|
||||
return True
|
||||
|
||||
with set_environ('no_proxy', no_proxy_arg):
|
||||
# parsed.hostname can be `None` in cases such as a file URI.
|
||||
try:
|
||||
bypass = proxy_bypass(parsed.hostname)
|
||||
except (TypeError, socket.gaierror):
|
||||
bypass = False
|
||||
|
||||
if bypass:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_environ_proxies(url, no_proxy=None):
|
||||
"""
|
||||
Return a dict of environment proxies.
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
if should_bypass_proxies(url, no_proxy=no_proxy):
|
||||
return {}
|
||||
else:
|
||||
return getproxies()
|
||||
|
||||
|
||||
def select_proxy(url, proxies):
|
||||
"""Select a proxy for the url, if applicable.
|
||||
|
||||
:param url: The url being for the request
|
||||
:param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
|
||||
"""
|
||||
proxies = proxies or {}
|
||||
urlparts = urlparse(url)
|
||||
if urlparts.hostname is None:
|
||||
return proxies.get(urlparts.scheme, proxies.get('all'))
|
||||
|
||||
proxy_keys = [
|
||||
urlparts.scheme + '://' + urlparts.hostname,
|
||||
urlparts.scheme,
|
||||
'all://' + urlparts.hostname,
|
||||
'all',
|
||||
]
|
||||
proxy = None
|
||||
for proxy_key in proxy_keys:
|
||||
if proxy_key in proxies:
|
||||
proxy = proxies[proxy_key]
|
||||
break
|
||||
|
||||
return proxy
|
||||
|
||||
|
||||
def default_user_agent(name="python-requests"):
|
||||
"""
|
||||
Return a string representing the default user agent.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
return '%s/%s' % (name, __version__)
|
||||
|
||||
|
||||
def default_headers():
|
||||
"""
|
||||
:rtype: requests.structures.CaseInsensitiveDict
|
||||
"""
|
||||
return CaseInsensitiveDict({
|
||||
'User-Agent': default_user_agent(),
|
||||
'Accept-Encoding': ', '.join(('gzip', 'deflate')),
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
})
|
||||
|
||||
|
||||
def parse_header_links(value):
|
||||
"""Return a list of parsed link headers proxies.
|
||||
|
||||
i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
|
||||
|
||||
:rtype: list
|
||||
"""
|
||||
|
||||
links = []
|
||||
|
||||
replace_chars = ' \'"'
|
||||
|
||||
value = value.strip(replace_chars)
|
||||
if not value:
|
||||
return links
|
||||
|
||||
for val in re.split(', *<', value):
|
||||
try:
|
||||
url, params = val.split(';', 1)
|
||||
except ValueError:
|
||||
url, params = val, ''
|
||||
|
||||
link = {'url': url.strip('<> \'"')}
|
||||
|
||||
for param in params.split(';'):
|
||||
try:
|
||||
key, value = param.split('=')
|
||||
except ValueError:
|
||||
break
|
||||
|
||||
link[key.strip(replace_chars)] = value.strip(replace_chars)
|
||||
|
||||
links.append(link)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
# Null bytes; no need to recreate these on each call to guess_json_utf
|
||||
_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
|
||||
_null2 = _null * 2
|
||||
_null3 = _null * 3
|
||||
|
||||
|
||||
def guess_json_utf(data):
|
||||
"""
|
||||
:rtype: str
|
||||
"""
|
||||
# JSON always starts with two ASCII characters, so detection is as
|
||||
# easy as counting the nulls and from their location and count
|
||||
# determine the encoding. Also detect a BOM, if present.
|
||||
sample = data[:4]
|
||||
if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
|
||||
return 'utf-32' # BOM included
|
||||
if sample[:3] == codecs.BOM_UTF8:
|
||||
return 'utf-8-sig' # BOM included, MS style (discouraged)
|
||||
if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
|
||||
return 'utf-16' # BOM included
|
||||
nullcount = sample.count(_null)
|
||||
if nullcount == 0:
|
||||
return 'utf-8'
|
||||
if nullcount == 2:
|
||||
if sample[::2] == _null2: # 1st and 3rd are null
|
||||
return 'utf-16-be'
|
||||
if sample[1::2] == _null2: # 2nd and 4th are null
|
||||
return 'utf-16-le'
|
||||
# Did not detect 2 valid UTF-16 ascii-range characters
|
||||
if nullcount == 3:
|
||||
if sample[:3] == _null3:
|
||||
return 'utf-32-be'
|
||||
if sample[1:] == _null3:
|
||||
return 'utf-32-le'
|
||||
# Did not detect a valid UTF-32 ascii-range character
|
||||
return None
|
||||
|
||||
|
||||
def prepend_scheme_if_needed(url, new_scheme):
|
||||
"""Given a URL that may or may not have a scheme, prepend the given scheme.
|
||||
Does not replace a present scheme with the one provided as an argument.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
|
||||
|
||||
# urlparse is a finicky beast, and sometimes decides that there isn't a
|
||||
# netloc present. Assume that it's being over-cautious, and switch netloc
|
||||
# and path if urlparse decided there was no netloc.
|
||||
if not netloc:
|
||||
netloc, path = path, netloc
|
||||
|
||||
return urlunparse((scheme, netloc, path, params, query, fragment))
|
||||
|
||||
|
||||
def get_auth_from_url(url):
|
||||
"""Given a url with authentication components, extract them into a tuple of
|
||||
username,password.
|
||||
|
||||
:rtype: (str,str)
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
|
||||
try:
|
||||
auth = (unquote(parsed.username), unquote(parsed.password))
|
||||
except (AttributeError, TypeError):
|
||||
auth = ('', '')
|
||||
|
||||
return auth
|
||||
|
||||
|
||||
# Moved outside of function to avoid recompile every call
|
||||
_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
|
||||
_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
|
||||
|
||||
|
||||
def check_header_validity(header):
|
||||
"""Verifies that header value is a string which doesn't contain
|
||||
leading whitespace or return characters. This prevents unintended
|
||||
header injection.
|
||||
|
||||
:param header: tuple, in the format (name, value).
|
||||
"""
|
||||
name, value = header
|
||||
|
||||
if isinstance(value, bytes):
|
||||
pat = _CLEAN_HEADER_REGEX_BYTE
|
||||
else:
|
||||
pat = _CLEAN_HEADER_REGEX_STR
|
||||
try:
|
||||
if not pat.match(value):
|
||||
raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
|
||||
except TypeError:
|
||||
raise InvalidHeader("Value for header {%s: %s} must be of type str or "
|
||||
"bytes, not %s" % (name, value, type(value)))
|
||||
|
||||
|
||||
def urldefragauth(url):
|
||||
"""
|
||||
Given a url remove the fragment and the authentication part.
|
||||
|
||||
:rtype: str
|
||||
"""
|
||||
scheme, netloc, path, params, query, fragment = urlparse(url)
|
||||
|
||||
# see func:`prepend_scheme_if_needed`
|
||||
if not netloc:
|
||||
netloc, path = path, netloc
|
||||
|
||||
netloc = netloc.rsplit('@', 1)[-1]
|
||||
|
||||
return urlunparse((scheme, netloc, path, params, query, ''))
|
||||
|
||||
|
||||
def rewind_body(prepared_request):
|
||||
"""Move file pointer back to its recorded starting position
|
||||
so it can be read again on redirect.
|
||||
"""
|
||||
body_seek = getattr(prepared_request.body, 'seek', None)
|
||||
if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
|
||||
try:
|
||||
body_seek(prepared_request._body_position)
|
||||
except (IOError, OSError):
|
||||
raise UnrewindableBodyError("An error occurred when rewinding request "
|
||||
"body for redirect.")
|
||||
else:
|
||||
raise UnrewindableBodyError("Unable to rewind request body for redirect.")
|
|
@ -0,0 +1,86 @@
|
|||
"""
|
||||
urllib3 - Thread-safe connection pooling and re-using.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
import warnings
|
||||
|
||||
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
|
||||
|
||||
from . import exceptions
|
||||
from .filepost import encode_multipart_formdata
|
||||
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
|
||||
from .response import HTTPResponse
|
||||
from .util.request import make_headers
|
||||
from .util.url import get_host
|
||||
from .util.timeout import Timeout
|
||||
from .util.retry import Retry
|
||||
|
||||
|
||||
# Set default logging handler to avoid "No handler found" warnings.
|
||||
import logging
|
||||
from logging import NullHandler
|
||||
|
||||
__author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
|
||||
__license__ = "MIT"
|
||||
__version__ = "1.25.6"
|
||||
|
||||
__all__ = (
|
||||
"HTTPConnectionPool",
|
||||
"HTTPSConnectionPool",
|
||||
"PoolManager",
|
||||
"ProxyManager",
|
||||
"HTTPResponse",
|
||||
"Retry",
|
||||
"Timeout",
|
||||
"add_stderr_logger",
|
||||
"connection_from_url",
|
||||
"disable_warnings",
|
||||
"encode_multipart_formdata",
|
||||
"get_host",
|
||||
"make_headers",
|
||||
"proxy_from_url",
|
||||
)
|
||||
|
||||
logging.getLogger(__name__).addHandler(NullHandler())
|
||||
|
||||
|
||||
def add_stderr_logger(level=logging.DEBUG):
|
||||
"""
|
||||
Helper for quickly adding a StreamHandler to the logger. Useful for
|
||||
debugging.
|
||||
|
||||
Returns the handler after adding it.
|
||||
"""
|
||||
# This method needs to be in this __init__.py to get the __name__ correct
|
||||
# even if urllib3 is vendored within another package.
|
||||
logger = logging.getLogger(__name__)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(level)
|
||||
logger.debug("Added a stderr logging handler to logger: %s", __name__)
|
||||
return handler
|
||||
|
||||
|
||||
# ... Clean up.
|
||||
del NullHandler
|
||||
|
||||
|
||||
# All warning filters *must* be appended unless you're really certain that they
|
||||
# shouldn't be: otherwise, it's very hard for users to use most Python
|
||||
# mechanisms to silence them.
|
||||
# SecurityWarning's always go off by default.
|
||||
warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
|
||||
# SubjectAltNameWarning's should go off once per host
|
||||
warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
|
||||
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
|
||||
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
|
||||
# SNIMissingWarnings should go off only once.
|
||||
warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
|
||||
|
||||
|
||||
def disable_warnings(category=exceptions.HTTPWarning):
|
||||
"""
|
||||
Helper for quickly disabling all urllib3 warnings.
|
||||
"""
|
||||
warnings.simplefilter("ignore", category)
|
|
@ -0,0 +1,336 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping, MutableMapping
|
||||
except ImportError:
|
||||
from collections import Mapping, MutableMapping
|
||||
try:
|
||||
from threading import RLock
|
||||
except ImportError: # Platform-specific: No threads available
|
||||
|
||||
class RLock:
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
pass
|
||||
|
||||
|
||||
from collections import OrderedDict
|
||||
from .exceptions import InvalidHeader
|
||||
from .packages.six import iterkeys, itervalues, PY3
|
||||
|
||||
|
||||
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
|
||||
|
||||
|
||||
_Null = object()
|
||||
|
||||
|
||||
class RecentlyUsedContainer(MutableMapping):
|
||||
"""
|
||||
Provides a thread-safe dict-like container which maintains up to
|
||||
``maxsize`` keys while throwing away the least-recently-used keys beyond
|
||||
``maxsize``.
|
||||
|
||||
:param maxsize:
|
||||
Maximum number of recent elements to retain.
|
||||
|
||||
:param dispose_func:
|
||||
Every time an item is evicted from the container,
|
||||
``dispose_func(value)`` is called. Callback which will get called
|
||||
"""
|
||||
|
||||
ContainerCls = OrderedDict
|
||||
|
||||
def __init__(self, maxsize=10, dispose_func=None):
|
||||
self._maxsize = maxsize
|
||||
self.dispose_func = dispose_func
|
||||
|
||||
self._container = self.ContainerCls()
|
||||
self.lock = RLock()
|
||||
|
||||
def __getitem__(self, key):
|
||||
# Re-insert the item, moving it to the end of the eviction line.
|
||||
with self.lock:
|
||||
item = self._container.pop(key)
|
||||
self._container[key] = item
|
||||
return item
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
evicted_value = _Null
|
||||
with self.lock:
|
||||
# Possibly evict the existing value of 'key'
|
||||
evicted_value = self._container.get(key, _Null)
|
||||
self._container[key] = value
|
||||
|
||||
# If we didn't evict an existing value, we might have to evict the
|
||||
# least recently used item from the beginning of the container.
|
||||
if len(self._container) > self._maxsize:
|
||||
_key, evicted_value = self._container.popitem(last=False)
|
||||
|
||||
if self.dispose_func and evicted_value is not _Null:
|
||||
self.dispose_func(evicted_value)
|
||||
|
||||
def __delitem__(self, key):
|
||||
with self.lock:
|
||||
value = self._container.pop(key)
|
||||
|
||||
if self.dispose_func:
|
||||
self.dispose_func(value)
|
||||
|
||||
def __len__(self):
|
||||
with self.lock:
|
||||
return len(self._container)
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError(
|
||||
"Iteration over this class is unlikely to be threadsafe."
|
||||
)
|
||||
|
||||
def clear(self):
|
||||
with self.lock:
|
||||
# Copy pointers to all values, then wipe the mapping
|
||||
values = list(itervalues(self._container))
|
||||
self._container.clear()
|
||||
|
||||
if self.dispose_func:
|
||||
for value in values:
|
||||
self.dispose_func(value)
|
||||
|
||||
def keys(self):
|
||||
with self.lock:
|
||||
return list(iterkeys(self._container))
|
||||
|
||||
|
||||
class HTTPHeaderDict(MutableMapping):
|
||||
"""
|
||||
:param headers:
|
||||
An iterable of field-value pairs. Must not contain multiple field names
|
||||
when compared case-insensitively.
|
||||
|
||||
:param kwargs:
|
||||
Additional field-value pairs to pass in to ``dict.update``.
|
||||
|
||||
A ``dict`` like container for storing HTTP Headers.
|
||||
|
||||
Field names are stored and compared case-insensitively in compliance with
|
||||
RFC 7230. Iteration provides the first case-sensitive key seen for each
|
||||
case-insensitive pair.
|
||||
|
||||
Using ``__setitem__`` syntax overwrites fields that compare equal
|
||||
case-insensitively in order to maintain ``dict``'s api. For fields that
|
||||
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
|
||||
in a loop.
|
||||
|
||||
If multiple fields that are equal case-insensitively are passed to the
|
||||
constructor or ``.update``, the behavior is undefined and some will be
|
||||
lost.
|
||||
|
||||
>>> headers = HTTPHeaderDict()
|
||||
>>> headers.add('Set-Cookie', 'foo=bar')
|
||||
>>> headers.add('set-cookie', 'baz=quxx')
|
||||
>>> headers['content-length'] = '7'
|
||||
>>> headers['SET-cookie']
|
||||
'foo=bar, baz=quxx'
|
||||
>>> headers['Content-Length']
|
||||
'7'
|
||||
"""
|
||||
|
||||
def __init__(self, headers=None, **kwargs):
|
||||
super(HTTPHeaderDict, self).__init__()
|
||||
self._container = OrderedDict()
|
||||
if headers is not None:
|
||||
if isinstance(headers, HTTPHeaderDict):
|
||||
self._copy_from(headers)
|
||||
else:
|
||||
self.extend(headers)
|
||||
if kwargs:
|
||||
self.extend(kwargs)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
self._container[key.lower()] = [key, val]
|
||||
return self._container[key.lower()]
|
||||
|
||||
def __getitem__(self, key):
|
||||
val = self._container[key.lower()]
|
||||
return ", ".join(val[1:])
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._container[key.lower()]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key.lower() in self._container
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Mapping) and not hasattr(other, "keys"):
|
||||
return False
|
||||
if not isinstance(other, type(self)):
|
||||
other = type(self)(other)
|
||||
return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
|
||||
(k.lower(), v) for k, v in other.itermerged()
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
if not PY3: # Python 2
|
||||
iterkeys = MutableMapping.iterkeys
|
||||
itervalues = MutableMapping.itervalues
|
||||
|
||||
__marker = object()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._container)
|
||||
|
||||
def __iter__(self):
|
||||
# Only provide the originally cased names
|
||||
for vals in self._container.values():
|
||||
yield vals[0]
|
||||
|
||||
def pop(self, key, default=__marker):
|
||||
"""D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
|
||||
If key is not found, d is returned if given, otherwise KeyError is raised.
|
||||
"""
|
||||
# Using the MutableMapping function directly fails due to the private marker.
|
||||
# Using ordinary dict.pop would expose the internal structures.
|
||||
# So let's reinvent the wheel.
|
||||
try:
|
||||
value = self[key]
|
||||
except KeyError:
|
||||
if default is self.__marker:
|
||||
raise
|
||||
return default
|
||||
else:
|
||||
del self[key]
|
||||
return value
|
||||
|
||||
def discard(self, key):
|
||||
try:
|
||||
del self[key]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def add(self, key, val):
|
||||
"""Adds a (name, value) pair, doesn't overwrite the value if it already
|
||||
exists.
|
||||
|
||||
>>> headers = HTTPHeaderDict(foo='bar')
|
||||
>>> headers.add('Foo', 'baz')
|
||||
>>> headers['foo']
|
||||
'bar, baz'
|
||||
"""
|
||||
key_lower = key.lower()
|
||||
new_vals = [key, val]
|
||||
# Keep the common case aka no item present as fast as possible
|
||||
vals = self._container.setdefault(key_lower, new_vals)
|
||||
if new_vals is not vals:
|
||||
vals.append(val)
|
||||
|
||||
def extend(self, *args, **kwargs):
|
||||
"""Generic import function for any type of header-like object.
|
||||
Adapted version of MutableMapping.update in order to insert items
|
||||
with self.add instead of self.__setitem__
|
||||
"""
|
||||
if len(args) > 1:
|
||||
raise TypeError(
|
||||
"extend() takes at most 1 positional "
|
||||
"arguments ({0} given)".format(len(args))
|
||||
)
|
||||
other = args[0] if len(args) >= 1 else ()
|
||||
|
||||
if isinstance(other, HTTPHeaderDict):
|
||||
for key, val in other.iteritems():
|
||||
self.add(key, val)
|
||||
elif isinstance(other, Mapping):
|
||||
for key in other:
|
||||
self.add(key, other[key])
|
||||
elif hasattr(other, "keys"):
|
||||
for key in other.keys():
|
||||
self.add(key, other[key])
|
||||
else:
|
||||
for key, value in other:
|
||||
self.add(key, value)
|
||||
|
||||
for key, value in kwargs.items():
|
||||
self.add(key, value)
|
||||
|
||||
def getlist(self, key, default=__marker):
|
||||
"""Returns a list of all the values for the named field. Returns an
|
||||
empty list if the key doesn't exist."""
|
||||
try:
|
||||
vals = self._container[key.lower()]
|
||||
except KeyError:
|
||||
if default is self.__marker:
|
||||
return []
|
||||
return default
|
||||
else:
|
||||
return vals[1:]
|
||||
|
||||
# Backwards compatibility for httplib
|
||||
getheaders = getlist
|
||||
getallmatchingheaders = getlist
|
||||
iget = getlist
|
||||
|
||||
# Backwards compatibility for http.cookiejar
|
||||
get_all = getlist
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
|
||||
|
||||
def _copy_from(self, other):
|
||||
for key in other:
|
||||
val = other.getlist(key)
|
||||
if isinstance(val, list):
|
||||
# Don't need to convert tuples
|
||||
val = list(val)
|
||||
self._container[key.lower()] = [key] + val
|
||||
|
||||
def copy(self):
|
||||
clone = type(self)()
|
||||
clone._copy_from(self)
|
||||
return clone
|
||||
|
||||
def iteritems(self):
|
||||
"""Iterate over all header lines, including duplicate ones."""
|
||||
for key in self:
|
||||
vals = self._container[key.lower()]
|
||||
for val in vals[1:]:
|
||||
yield vals[0], val
|
||||
|
||||
def itermerged(self):
|
||||
"""Iterate over all headers, merging duplicate ones together."""
|
||||
for key in self:
|
||||
val = self._container[key.lower()]
|
||||
yield val[0], ", ".join(val[1:])
|
||||
|
||||
def items(self):
|
||||
return list(self.iteritems())
|
||||
|
||||
@classmethod
|
||||
def from_httplib(cls, message): # Python 2
|
||||
"""Read headers from a Python 2 httplib message object."""
|
||||
# python2.7 does not expose a proper API for exporting multiheaders
|
||||
# efficiently. This function re-reads raw lines from the message
|
||||
# object and extracts the multiheaders properly.
|
||||
obs_fold_continued_leaders = (" ", "\t")
|
||||
headers = []
|
||||
|
||||
for line in message.headers:
|
||||
if line.startswith(obs_fold_continued_leaders):
|
||||
if not headers:
|
||||
# We received a header line that starts with OWS as described
|
||||
# in RFC-7230 S3.2.4. This indicates a multiline header, but
|
||||
# there exists no previous header to which we can attach it.
|
||||
raise InvalidHeader(
|
||||
"Header continuation with no previous header: %s" % line
|
||||
)
|
||||
else:
|
||||
key, value = headers[-1]
|
||||
headers[-1] = (key, value + " " + line.strip())
|
||||
continue
|
||||
|
||||
key, value = line.split(":", 1)
|
||||
headers.append((key, value.strip()))
|
||||
|
||||
return cls(headers)
|
|
@ -0,0 +1,448 @@
|
|||
from __future__ import absolute_import
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
from socket import error as SocketError, timeout as SocketTimeout
|
||||
import warnings
|
||||
from .packages import six
|
||||
from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
|
||||
from .packages.six.moves.http_client import HTTPException # noqa: F401
|
||||
|
||||
try: # Compiled with SSL?
|
||||
import ssl
|
||||
|
||||
BaseSSLError = ssl.SSLError
|
||||
except (ImportError, AttributeError): # Platform-specific: No SSL.
|
||||
ssl = None
|
||||
|
||||
class BaseSSLError(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
# Python 3: not a no-op, we're adding this to the namespace so it can be imported.
|
||||
ConnectionError = ConnectionError
|
||||
except NameError:
|
||||
# Python 2
|
||||
class ConnectionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
from .exceptions import (
|
||||
NewConnectionError,
|
||||
ConnectTimeoutError,
|
||||
SubjectAltNameWarning,
|
||||
SystemTimeWarning,
|
||||
)
|
||||
from .packages.ssl_match_hostname import match_hostname, CertificateError
|
||||
|
||||
from .util.ssl_ import (
|
||||
resolve_cert_reqs,
|
||||
resolve_ssl_version,
|
||||
assert_fingerprint,
|
||||
create_urllib3_context,
|
||||
ssl_wrap_socket,
|
||||
)
|
||||
|
||||
|
||||
from .util import connection
|
||||
|
||||
from ._collections import HTTPHeaderDict
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
port_by_scheme = {"http": 80, "https": 443}
|
||||
|
||||
# When it comes time to update this value as a part of regular maintenance
|
||||
# (ie test_recent_date is failing) update it to ~6 months before the current date.
|
||||
RECENT_DATE = datetime.date(2019, 1, 1)
|
||||
|
||||
|
||||
class DummyConnection(object):
|
||||
"""Used to detect a failed ConnectionCls import."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class HTTPConnection(_HTTPConnection, object):
|
||||
"""
|
||||
Based on httplib.HTTPConnection but provides an extra constructor
|
||||
backwards-compatibility layer between older and newer Pythons.
|
||||
|
||||
Additional keyword parameters are used to configure attributes of the connection.
|
||||
Accepted parameters include:
|
||||
|
||||
- ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
|
||||
- ``source_address``: Set the source address for the current connection.
|
||||
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
|
||||
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
|
||||
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
|
||||
|
||||
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
|
||||
you might pass::
|
||||
|
||||
HTTPConnection.default_socket_options + [
|
||||
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
|
||||
]
|
||||
|
||||
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
|
||||
"""
|
||||
|
||||
default_port = port_by_scheme["http"]
|
||||
|
||||
#: Disable Nagle's algorithm by default.
|
||||
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
|
||||
default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
|
||||
|
||||
#: Whether this connection verifies the host's certificate.
|
||||
is_verified = False
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
if not six.PY2:
|
||||
kw.pop("strict", None)
|
||||
|
||||
# Pre-set source_address.
|
||||
self.source_address = kw.get("source_address")
|
||||
|
||||
#: The socket options provided by the user. If no options are
|
||||
#: provided, we use the default options.
|
||||
self.socket_options = kw.pop("socket_options", self.default_socket_options)
|
||||
|
||||
_HTTPConnection.__init__(self, *args, **kw)
|
||||
|
||||
@property
|
||||
def host(self):
|
||||
"""
|
||||
Getter method to remove any trailing dots that indicate the hostname is an FQDN.
|
||||
|
||||
In general, SSL certificates don't include the trailing dot indicating a
|
||||
fully-qualified domain name, and thus, they don't validate properly when
|
||||
checked against a domain name that includes the dot. In addition, some
|
||||
servers may not expect to receive the trailing dot when provided.
|
||||
|
||||
However, the hostname with trailing dot is critical to DNS resolution; doing a
|
||||
lookup with the trailing dot will properly only resolve the appropriate FQDN,
|
||||
whereas a lookup without a trailing dot will search the system's search domain
|
||||
list. Thus, it's important to keep the original host around for use only in
|
||||
those cases where it's appropriate (i.e., when doing DNS lookup to establish the
|
||||
actual TCP connection across which we're going to send HTTP requests).
|
||||
"""
|
||||
return self._dns_host.rstrip(".")
|
||||
|
||||
@host.setter
|
||||
def host(self, value):
|
||||
"""
|
||||
Setter for the `host` property.
|
||||
|
||||
We assume that only urllib3 uses the _dns_host attribute; httplib itself
|
||||
only uses `host`, and it seems reasonable that other libraries follow suit.
|
||||
"""
|
||||
self._dns_host = value
|
||||
|
||||
def _new_conn(self):
|
||||
""" Establish a socket connection and set nodelay settings on it.
|
||||
|
||||
:return: New socket connection.
|
||||
"""
|
||||
extra_kw = {}
|
||||
if self.source_address:
|
||||
extra_kw["source_address"] = self.source_address
|
||||
|
||||
if self.socket_options:
|
||||
extra_kw["socket_options"] = self.socket_options
|
||||
|
||||
try:
|
||||
conn = connection.create_connection(
|
||||
(self._dns_host, self.port), self.timeout, **extra_kw
|
||||
)
|
||||
|
||||
except SocketTimeout:
|
||||
raise ConnectTimeoutError(
|
||||
self,
|
||||
"Connection to %s timed out. (connect timeout=%s)"
|
||||
% (self.host, self.timeout),
|
||||
)
|
||||
|
||||
except SocketError as e:
|
||||
raise NewConnectionError(
|
||||
self, "Failed to establish a new connection: %s" % e
|
||||
)
|
||||
|
||||
return conn
|
||||
|
||||
def _prepare_conn(self, conn):
|
||||
self.sock = conn
|
||||
# Google App Engine's httplib does not define _tunnel_host
|
||||
if getattr(self, "_tunnel_host", None):
|
||||
# TODO: Fix tunnel so it doesn't depend on self.sock state.
|
||||
self._tunnel()
|
||||
# Mark this connection as not reusable
|
||||
self.auto_open = 0
|
||||
|
||||
def connect(self):
|
||||
conn = self._new_conn()
|
||||
self._prepare_conn(conn)
|
||||
|
||||
def request_chunked(self, method, url, body=None, headers=None):
|
||||
"""
|
||||
Alternative to the common request method, which sends the
|
||||
body with chunked encoding and not as one block
|
||||
"""
|
||||
headers = HTTPHeaderDict(headers if headers is not None else {})
|
||||
skip_accept_encoding = "accept-encoding" in headers
|
||||
skip_host = "host" in headers
|
||||
self.putrequest(
|
||||
method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
|
||||
)
|
||||
for header, value in headers.items():
|
||||
self.putheader(header, value)
|
||||
if "transfer-encoding" not in headers:
|
||||
self.putheader("Transfer-Encoding", "chunked")
|
||||
self.endheaders()
|
||||
|
||||
if body is not None:
|
||||
stringish_types = six.string_types + (bytes,)
|
||||
if isinstance(body, stringish_types):
|
||||
body = (body,)
|
||||
for chunk in body:
|
||||
if not chunk:
|
||||
continue
|
||||
if not isinstance(chunk, bytes):
|
||||
chunk = chunk.encode("utf8")
|
||||
len_str = hex(len(chunk))[2:]
|
||||
self.send(len_str.encode("utf-8"))
|
||||
self.send(b"\r\n")
|
||||
self.send(chunk)
|
||||
self.send(b"\r\n")
|
||||
|
||||
# After the if clause, to always have a closed body
|
||||
self.send(b"0\r\n\r\n")
|
||||
|
||||
|
||||
class HTTPSConnection(HTTPConnection):
|
||||
default_port = port_by_scheme["https"]
|
||||
|
||||
ssl_version = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host,
|
||||
port=None,
|
||||
key_file=None,
|
||||
cert_file=None,
|
||||
key_password=None,
|
||||
strict=None,
|
||||
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
ssl_context=None,
|
||||
server_hostname=None,
|
||||
**kw
|
||||
):
|
||||
|
||||
HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw)
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.key_password = key_password
|
||||
self.ssl_context = ssl_context
|
||||
self.server_hostname = server_hostname
|
||||
|
||||
# Required property for Google AppEngine 1.9.0 which otherwise causes
|
||||
# HTTPS requests to go out as HTTP. (See Issue #356)
|
||||
self._protocol = "https"
|
||||
|
||||
def connect(self):
|
||||
conn = self._new_conn()
|
||||
self._prepare_conn(conn)
|
||||
|
||||
# Wrap socket using verification with the root certs in
|
||||
# trusted_root_certs
|
||||
default_ssl_context = False
|
||||
if self.ssl_context is None:
|
||||
default_ssl_context = True
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=resolve_ssl_version(self.ssl_version),
|
||||
cert_reqs=resolve_cert_reqs(self.cert_reqs),
|
||||
)
|
||||
|
||||
# Try to load OS default certs if none are given.
|
||||
# Works well on Windows (requires Python3.4+)
|
||||
context = self.ssl_context
|
||||
if (
|
||||
not self.ca_certs
|
||||
and not self.ca_cert_dir
|
||||
and default_ssl_context
|
||||
and hasattr(context, "load_default_certs")
|
||||
):
|
||||
context.load_default_certs()
|
||||
|
||||
self.sock = ssl_wrap_socket(
|
||||
sock=conn,
|
||||
keyfile=self.key_file,
|
||||
certfile=self.cert_file,
|
||||
key_password=self.key_password,
|
||||
ssl_context=self.ssl_context,
|
||||
server_hostname=self.server_hostname,
|
||||
)
|
||||
|
||||
|
||||
class VerifiedHTTPSConnection(HTTPSConnection):
|
||||
"""
|
||||
Based on httplib.HTTPSConnection but wraps the socket with
|
||||
SSL certification.
|
||||
"""
|
||||
|
||||
cert_reqs = None
|
||||
ca_certs = None
|
||||
ca_cert_dir = None
|
||||
ssl_version = None
|
||||
assert_fingerprint = None
|
||||
|
||||
def set_cert(
|
||||
self,
|
||||
key_file=None,
|
||||
cert_file=None,
|
||||
cert_reqs=None,
|
||||
key_password=None,
|
||||
ca_certs=None,
|
||||
assert_hostname=None,
|
||||
assert_fingerprint=None,
|
||||
ca_cert_dir=None,
|
||||
):
|
||||
"""
|
||||
This method should only be called once, before the connection is used.
|
||||
"""
|
||||
# If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also
|
||||
# have an SSLContext object in which case we'll use its verify_mode.
|
||||
if cert_reqs is None:
|
||||
if self.ssl_context is not None:
|
||||
cert_reqs = self.ssl_context.verify_mode
|
||||
else:
|
||||
cert_reqs = resolve_cert_reqs(None)
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.cert_reqs = cert_reqs
|
||||
self.key_password = key_password
|
||||
self.assert_hostname = assert_hostname
|
||||
self.assert_fingerprint = assert_fingerprint
|
||||
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
|
||||
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
|
||||
|
||||
def connect(self):
|
||||
# Add certificate verification
|
||||
conn = self._new_conn()
|
||||
hostname = self.host
|
||||
|
||||
# Google App Engine's httplib does not define _tunnel_host
|
||||
if getattr(self, "_tunnel_host", None):
|
||||
self.sock = conn
|
||||
# Calls self._set_hostport(), so self.host is
|
||||
# self._tunnel_host below.
|
||||
self._tunnel()
|
||||
# Mark this connection as not reusable
|
||||
self.auto_open = 0
|
||||
|
||||
# Override the host with the one we're requesting data from.
|
||||
hostname = self._tunnel_host
|
||||
|
||||
server_hostname = hostname
|
||||
if self.server_hostname is not None:
|
||||
server_hostname = self.server_hostname
|
||||
|
||||
is_time_off = datetime.date.today() < RECENT_DATE
|
||||
if is_time_off:
|
||||
warnings.warn(
|
||||
(
|
||||
"System time is way off (before {0}). This will probably "
|
||||
"lead to SSL verification errors"
|
||||
).format(RECENT_DATE),
|
||||
SystemTimeWarning,
|
||||
)
|
||||
|
||||
# Wrap socket using verification with the root certs in
|
||||
# trusted_root_certs
|
||||
default_ssl_context = False
|
||||
if self.ssl_context is None:
|
||||
default_ssl_context = True
|
||||
self.ssl_context = create_urllib3_context(
|
||||
ssl_version=resolve_ssl_version(self.ssl_version),
|
||||
cert_reqs=resolve_cert_reqs(self.cert_reqs),
|
||||
)
|
||||
|
||||
context = self.ssl_context
|
||||
context.verify_mode = resolve_cert_reqs(self.cert_reqs)
|
||||
|
||||
# Try to load OS default certs if none are given.
|
||||
# Works well on Windows (requires Python3.4+)
|
||||
if (
|
||||
not self.ca_certs
|
||||
and not self.ca_cert_dir
|
||||
and default_ssl_context
|
||||
and hasattr(context, "load_default_certs")
|
||||
):
|
||||
context.load_default_certs()
|
||||
|
||||
self.sock = ssl_wrap_socket(
|
||||
sock=conn,
|
||||
keyfile=self.key_file,
|
||||
certfile=self.cert_file,
|
||||
key_password=self.key_password,
|
||||
ca_certs=self.ca_certs,
|
||||
ca_cert_dir=self.ca_cert_dir,
|
||||
server_hostname=server_hostname,
|
||||
ssl_context=context,
|
||||
)
|
||||
|
||||
if self.assert_fingerprint:
|
||||
assert_fingerprint(
|
||||
self.sock.getpeercert(binary_form=True), self.assert_fingerprint
|
||||
)
|
||||
elif (
|
||||
context.verify_mode != ssl.CERT_NONE
|
||||
and not getattr(context, "check_hostname", False)
|
||||
and self.assert_hostname is not False
|
||||
):
|
||||
# While urllib3 attempts to always turn off hostname matching from
|
||||
# the TLS library, this cannot always be done. So we check whether
|
||||
# the TLS Library still thinks it's matching hostnames.
|
||||
cert = self.sock.getpeercert()
|
||||
if not cert.get("subjectAltName", ()):
|
||||
warnings.warn(
|
||||
(
|
||||
"Certificate for {0} has no `subjectAltName`, falling back to check for a "
|
||||
"`commonName` for now. This feature is being removed by major browsers and "
|
||||
"deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 "
|
||||
"for details.)".format(hostname)
|
||||
),
|
||||
SubjectAltNameWarning,
|
||||
)
|
||||
_match_hostname(cert, self.assert_hostname or server_hostname)
|
||||
|
||||
self.is_verified = (
|
||||
context.verify_mode == ssl.CERT_REQUIRED
|
||||
or self.assert_fingerprint is not None
|
||||
)
|
||||
|
||||
|
||||
def _match_hostname(cert, asserted_hostname):
|
||||
try:
|
||||
match_hostname(cert, asserted_hostname)
|
||||
except CertificateError as e:
|
||||
log.warning(
|
||||
"Certificate did not match expected hostname: %s. " "Certificate: %s",
|
||||
asserted_hostname,
|
||||
cert,
|
||||
)
|
||||
# Add cert to exception and reraise so client code can inspect
|
||||
# the cert when catching the exception, if they want to
|
||||
e._peer_cert = cert
|
||||
raise
|
||||
|
||||
|
||||
if ssl:
|
||||
# Make a copy for testing.
|
||||
UnverifiedHTTPSConnection = HTTPSConnection
|
||||
HTTPSConnection = VerifiedHTTPSConnection
|
||||
else:
|
||||
HTTPSConnection = DummyConnection
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,32 @@
|
|||
"""
|
||||
This module provides means to detect the App Engine environment.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def is_appengine():
|
||||
return is_local_appengine() or is_prod_appengine() or is_prod_appengine_mvms()
|
||||
|
||||
|
||||
def is_appengine_sandbox():
|
||||
return is_appengine() and not is_prod_appengine_mvms()
|
||||
|
||||
|
||||
def is_local_appengine():
|
||||
return (
|
||||
"APPENGINE_RUNTIME" in os.environ
|
||||
and "Development/" in os.environ["SERVER_SOFTWARE"]
|
||||
)
|
||||
|
||||
|
||||
def is_prod_appengine():
|
||||
return (
|
||||
"APPENGINE_RUNTIME" in os.environ
|
||||
and "Google App Engine/" in os.environ["SERVER_SOFTWARE"]
|
||||
and not is_prod_appengine_mvms()
|
||||
)
|
||||
|
||||
|
||||
def is_prod_appengine_mvms():
|
||||
return os.environ.get("GAE_VM", False) == "true"
|
|
@ -0,0 +1,492 @@
|
|||
"""
|
||||
This module uses ctypes to bind a whole bunch of functions and constants from
|
||||
SecureTransport. The goal here is to provide the low-level API to
|
||||
SecureTransport. These are essentially the C-level functions and constants, and
|
||||
they're pretty gross to work with.
|
||||
|
||||
This code is a bastardised version of the code found in Will Bond's oscrypto
|
||||
library. An enormous debt is owed to him for blazing this trail for us. For
|
||||
that reason, this code should be considered to be covered both by urllib3's
|
||||
license and by oscrypto's:
|
||||
|
||||
Copyright (c) 2015-2016 Will Bond <will@wbond.net>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import platform
|
||||
from ctypes.util import find_library
|
||||
from ctypes import (
|
||||
c_void_p,
|
||||
c_int32,
|
||||
c_char_p,
|
||||
c_size_t,
|
||||
c_byte,
|
||||
c_uint32,
|
||||
c_ulong,
|
||||
c_long,
|
||||
c_bool,
|
||||
)
|
||||
from ctypes import CDLL, POINTER, CFUNCTYPE
|
||||
|
||||
|
||||
security_path = find_library("Security")
|
||||
if not security_path:
|
||||
raise ImportError("The library Security could not be found")
|
||||
|
||||
|
||||
core_foundation_path = find_library("CoreFoundation")
|
||||
if not core_foundation_path:
|
||||
raise ImportError("The library CoreFoundation could not be found")
|
||||
|
||||
|
||||
version = platform.mac_ver()[0]
|
||||
version_info = tuple(map(int, version.split(".")))
|
||||
if version_info < (10, 8):
|
||||
raise OSError(
|
||||
"Only OS X 10.8 and newer are supported, not %s.%s"
|
||||
% (version_info[0], version_info[1])
|
||||
)
|
||||
|
||||
Security = CDLL(security_path, use_errno=True)
|
||||
CoreFoundation = CDLL(core_foundation_path, use_errno=True)
|
||||
|
||||
Boolean = c_bool
|
||||
CFIndex = c_long
|
||||
CFStringEncoding = c_uint32
|
||||
CFData = c_void_p
|
||||
CFString = c_void_p
|
||||
CFArray = c_void_p
|
||||
CFMutableArray = c_void_p
|
||||
CFDictionary = c_void_p
|
||||
CFError = c_void_p
|
||||
CFType = c_void_p
|
||||
CFTypeID = c_ulong
|
||||
|
||||
CFTypeRef = POINTER(CFType)
|
||||
CFAllocatorRef = c_void_p
|
||||
|
||||
OSStatus = c_int32
|
||||
|
||||
CFDataRef = POINTER(CFData)
|
||||
CFStringRef = POINTER(CFString)
|
||||
CFArrayRef = POINTER(CFArray)
|
||||
CFMutableArrayRef = POINTER(CFMutableArray)
|
||||
CFDictionaryRef = POINTER(CFDictionary)
|
||||
CFArrayCallBacks = c_void_p
|
||||
CFDictionaryKeyCallBacks = c_void_p
|
||||
CFDictionaryValueCallBacks = c_void_p
|
||||
|
||||
SecCertificateRef = POINTER(c_void_p)
|
||||
SecExternalFormat = c_uint32
|
||||
SecExternalItemType = c_uint32
|
||||
SecIdentityRef = POINTER(c_void_p)
|
||||
SecItemImportExportFlags = c_uint32
|
||||
SecItemImportExportKeyParameters = c_void_p
|
||||
SecKeychainRef = POINTER(c_void_p)
|
||||
SSLProtocol = c_uint32
|
||||
SSLCipherSuite = c_uint32
|
||||
SSLContextRef = POINTER(c_void_p)
|
||||
SecTrustRef = POINTER(c_void_p)
|
||||
SSLConnectionRef = c_uint32
|
||||
SecTrustResultType = c_uint32
|
||||
SecTrustOptionFlags = c_uint32
|
||||
SSLProtocolSide = c_uint32
|
||||
SSLConnectionType = c_uint32
|
||||
SSLSessionOption = c_uint32
|
||||
|
||||
|
||||
try:
|
||||
Security.SecItemImport.argtypes = [
|
||||
CFDataRef,
|
||||
CFStringRef,
|
||||
POINTER(SecExternalFormat),
|
||||
POINTER(SecExternalItemType),
|
||||
SecItemImportExportFlags,
|
||||
POINTER(SecItemImportExportKeyParameters),
|
||||
SecKeychainRef,
|
||||
POINTER(CFArrayRef),
|
||||
]
|
||||
Security.SecItemImport.restype = OSStatus
|
||||
|
||||
Security.SecCertificateGetTypeID.argtypes = []
|
||||
Security.SecCertificateGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecIdentityGetTypeID.argtypes = []
|
||||
Security.SecIdentityGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecKeyGetTypeID.argtypes = []
|
||||
Security.SecKeyGetTypeID.restype = CFTypeID
|
||||
|
||||
Security.SecCertificateCreateWithData.argtypes = [CFAllocatorRef, CFDataRef]
|
||||
Security.SecCertificateCreateWithData.restype = SecCertificateRef
|
||||
|
||||
Security.SecCertificateCopyData.argtypes = [SecCertificateRef]
|
||||
Security.SecCertificateCopyData.restype = CFDataRef
|
||||
|
||||
Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p]
|
||||
Security.SecCopyErrorMessageString.restype = CFStringRef
|
||||
|
||||
Security.SecIdentityCreateWithCertificate.argtypes = [
|
||||
CFTypeRef,
|
||||
SecCertificateRef,
|
||||
POINTER(SecIdentityRef),
|
||||
]
|
||||
Security.SecIdentityCreateWithCertificate.restype = OSStatus
|
||||
|
||||
Security.SecKeychainCreate.argtypes = [
|
||||
c_char_p,
|
||||
c_uint32,
|
||||
c_void_p,
|
||||
Boolean,
|
||||
c_void_p,
|
||||
POINTER(SecKeychainRef),
|
||||
]
|
||||
Security.SecKeychainCreate.restype = OSStatus
|
||||
|
||||
Security.SecKeychainDelete.argtypes = [SecKeychainRef]
|
||||
Security.SecKeychainDelete.restype = OSStatus
|
||||
|
||||
Security.SecPKCS12Import.argtypes = [
|
||||
CFDataRef,
|
||||
CFDictionaryRef,
|
||||
POINTER(CFArrayRef),
|
||||
]
|
||||
Security.SecPKCS12Import.restype = OSStatus
|
||||
|
||||
SSLReadFunc = CFUNCTYPE(OSStatus, SSLConnectionRef, c_void_p, POINTER(c_size_t))
|
||||
SSLWriteFunc = CFUNCTYPE(
|
||||
OSStatus, SSLConnectionRef, POINTER(c_byte), POINTER(c_size_t)
|
||||
)
|
||||
|
||||
Security.SSLSetIOFuncs.argtypes = [SSLContextRef, SSLReadFunc, SSLWriteFunc]
|
||||
Security.SSLSetIOFuncs.restype = OSStatus
|
||||
|
||||
Security.SSLSetPeerID.argtypes = [SSLContextRef, c_char_p, c_size_t]
|
||||
Security.SSLSetPeerID.restype = OSStatus
|
||||
|
||||
Security.SSLSetCertificate.argtypes = [SSLContextRef, CFArrayRef]
|
||||
Security.SSLSetCertificate.restype = OSStatus
|
||||
|
||||
Security.SSLSetCertificateAuthorities.argtypes = [SSLContextRef, CFTypeRef, Boolean]
|
||||
Security.SSLSetCertificateAuthorities.restype = OSStatus
|
||||
|
||||
Security.SSLSetConnection.argtypes = [SSLContextRef, SSLConnectionRef]
|
||||
Security.SSLSetConnection.restype = OSStatus
|
||||
|
||||
Security.SSLSetPeerDomainName.argtypes = [SSLContextRef, c_char_p, c_size_t]
|
||||
Security.SSLSetPeerDomainName.restype = OSStatus
|
||||
|
||||
Security.SSLHandshake.argtypes = [SSLContextRef]
|
||||
Security.SSLHandshake.restype = OSStatus
|
||||
|
||||
Security.SSLRead.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)]
|
||||
Security.SSLRead.restype = OSStatus
|
||||
|
||||
Security.SSLWrite.argtypes = [SSLContextRef, c_char_p, c_size_t, POINTER(c_size_t)]
|
||||
Security.SSLWrite.restype = OSStatus
|
||||
|
||||
Security.SSLClose.argtypes = [SSLContextRef]
|
||||
Security.SSLClose.restype = OSStatus
|
||||
|
||||
Security.SSLGetNumberSupportedCiphers.argtypes = [SSLContextRef, POINTER(c_size_t)]
|
||||
Security.SSLGetNumberSupportedCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetSupportedCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
POINTER(c_size_t),
|
||||
]
|
||||
Security.SSLGetSupportedCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLSetEnabledCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
c_size_t,
|
||||
]
|
||||
Security.SSLSetEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetNumberEnabledCiphers.argtype = [SSLContextRef, POINTER(c_size_t)]
|
||||
Security.SSLGetNumberEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetEnabledCiphers.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLCipherSuite),
|
||||
POINTER(c_size_t),
|
||||
]
|
||||
Security.SSLGetEnabledCiphers.restype = OSStatus
|
||||
|
||||
Security.SSLGetNegotiatedCipher.argtypes = [SSLContextRef, POINTER(SSLCipherSuite)]
|
||||
Security.SSLGetNegotiatedCipher.restype = OSStatus
|
||||
|
||||
Security.SSLGetNegotiatedProtocolVersion.argtypes = [
|
||||
SSLContextRef,
|
||||
POINTER(SSLProtocol),
|
||||
]
|
||||
Security.SSLGetNegotiatedProtocolVersion.restype = OSStatus
|
||||
|
||||
Security.SSLCopyPeerTrust.argtypes = [SSLContextRef, POINTER(SecTrustRef)]
|
||||
Security.SSLCopyPeerTrust.restype = OSStatus
|
||||
|
||||
Security.SecTrustSetAnchorCertificates.argtypes = [SecTrustRef, CFArrayRef]
|
||||
Security.SecTrustSetAnchorCertificates.restype = OSStatus
|
||||
|
||||
Security.SecTrustSetAnchorCertificatesOnly.argstypes = [SecTrustRef, Boolean]
|
||||
Security.SecTrustSetAnchorCertificatesOnly.restype = OSStatus
|
||||
|
||||
Security.SecTrustEvaluate.argtypes = [SecTrustRef, POINTER(SecTrustResultType)]
|
||||
Security.SecTrustEvaluate.restype = OSStatus
|
||||
|
||||
Security.SecTrustGetCertificateCount.argtypes = [SecTrustRef]
|
||||
Security.SecTrustGetCertificateCount.restype = CFIndex
|
||||
|
||||
Security.SecTrustGetCertificateAtIndex.argtypes = [SecTrustRef, CFIndex]
|
||||
Security.SecTrustGetCertificateAtIndex.restype = SecCertificateRef
|
||||
|
||||
Security.SSLCreateContext.argtypes = [
|
||||
CFAllocatorRef,
|
||||
SSLProtocolSide,
|
||||
SSLConnectionType,
|
||||
]
|
||||
Security.SSLCreateContext.restype = SSLContextRef
|
||||
|
||||
Security.SSLSetSessionOption.argtypes = [SSLContextRef, SSLSessionOption, Boolean]
|
||||
Security.SSLSetSessionOption.restype = OSStatus
|
||||
|
||||
Security.SSLSetProtocolVersionMin.argtypes = [SSLContextRef, SSLProtocol]
|
||||
Security.SSLSetProtocolVersionMin.restype = OSStatus
|
||||
|
||||
Security.SSLSetProtocolVersionMax.argtypes = [SSLContextRef, SSLProtocol]
|
||||
Security.SSLSetProtocolVersionMax.restype = OSStatus
|
||||
|
||||
Security.SecCopyErrorMessageString.argtypes = [OSStatus, c_void_p]
|
||||
Security.SecCopyErrorMessageString.restype = CFStringRef
|
||||
|
||||
Security.SSLReadFunc = SSLReadFunc
|
||||
Security.SSLWriteFunc = SSLWriteFunc
|
||||
Security.SSLContextRef = SSLContextRef
|
||||
Security.SSLProtocol = SSLProtocol
|
||||
Security.SSLCipherSuite = SSLCipherSuite
|
||||
Security.SecIdentityRef = SecIdentityRef
|
||||
Security.SecKeychainRef = SecKeychainRef
|
||||
Security.SecTrustRef = SecTrustRef
|
||||
Security.SecTrustResultType = SecTrustResultType
|
||||
Security.SecExternalFormat = SecExternalFormat
|
||||
Security.OSStatus = OSStatus
|
||||
|
||||
Security.kSecImportExportPassphrase = CFStringRef.in_dll(
|
||||
Security, "kSecImportExportPassphrase"
|
||||
)
|
||||
Security.kSecImportItemIdentity = CFStringRef.in_dll(
|
||||
Security, "kSecImportItemIdentity"
|
||||
)
|
||||
|
||||
# CoreFoundation time!
|
||||
CoreFoundation.CFRetain.argtypes = [CFTypeRef]
|
||||
CoreFoundation.CFRetain.restype = CFTypeRef
|
||||
|
||||
CoreFoundation.CFRelease.argtypes = [CFTypeRef]
|
||||
CoreFoundation.CFRelease.restype = None
|
||||
|
||||
CoreFoundation.CFGetTypeID.argtypes = [CFTypeRef]
|
||||
CoreFoundation.CFGetTypeID.restype = CFTypeID
|
||||
|
||||
CoreFoundation.CFStringCreateWithCString.argtypes = [
|
||||
CFAllocatorRef,
|
||||
c_char_p,
|
||||
CFStringEncoding,
|
||||
]
|
||||
CoreFoundation.CFStringCreateWithCString.restype = CFStringRef
|
||||
|
||||
CoreFoundation.CFStringGetCStringPtr.argtypes = [CFStringRef, CFStringEncoding]
|
||||
CoreFoundation.CFStringGetCStringPtr.restype = c_char_p
|
||||
|
||||
CoreFoundation.CFStringGetCString.argtypes = [
|
||||
CFStringRef,
|
||||
c_char_p,
|
||||
CFIndex,
|
||||
CFStringEncoding,
|
||||
]
|
||||
CoreFoundation.CFStringGetCString.restype = c_bool
|
||||
|
||||
CoreFoundation.CFDataCreate.argtypes = [CFAllocatorRef, c_char_p, CFIndex]
|
||||
CoreFoundation.CFDataCreate.restype = CFDataRef
|
||||
|
||||
CoreFoundation.CFDataGetLength.argtypes = [CFDataRef]
|
||||
CoreFoundation.CFDataGetLength.restype = CFIndex
|
||||
|
||||
CoreFoundation.CFDataGetBytePtr.argtypes = [CFDataRef]
|
||||
CoreFoundation.CFDataGetBytePtr.restype = c_void_p
|
||||
|
||||
CoreFoundation.CFDictionaryCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
POINTER(CFTypeRef),
|
||||
POINTER(CFTypeRef),
|
||||
CFIndex,
|
||||
CFDictionaryKeyCallBacks,
|
||||
CFDictionaryValueCallBacks,
|
||||
]
|
||||
CoreFoundation.CFDictionaryCreate.restype = CFDictionaryRef
|
||||
|
||||
CoreFoundation.CFDictionaryGetValue.argtypes = [CFDictionaryRef, CFTypeRef]
|
||||
CoreFoundation.CFDictionaryGetValue.restype = CFTypeRef
|
||||
|
||||
CoreFoundation.CFArrayCreate.argtypes = [
|
||||
CFAllocatorRef,
|
||||
POINTER(CFTypeRef),
|
||||
CFIndex,
|
||||
CFArrayCallBacks,
|
||||
]
|
||||
CoreFoundation.CFArrayCreate.restype = CFArrayRef
|
||||
|
||||
CoreFoundation.CFArrayCreateMutable.argtypes = [
|
||||
CFAllocatorRef,
|
||||
CFIndex,
|
||||
CFArrayCallBacks,
|
||||
]
|
||||
CoreFoundation.CFArrayCreateMutable.restype = CFMutableArrayRef
|
||||
|
||||
CoreFoundation.CFArrayAppendValue.argtypes = [CFMutableArrayRef, c_void_p]
|
||||
CoreFoundation.CFArrayAppendValue.restype = None
|
||||
|
||||
CoreFoundation.CFArrayGetCount.argtypes = [CFArrayRef]
|
||||
CoreFoundation.CFArrayGetCount.restype = CFIndex
|
||||
|
||||
CoreFoundation.CFArrayGetValueAtIndex.argtypes = [CFArrayRef, CFIndex]
|
||||
CoreFoundation.CFArrayGetValueAtIndex.restype = c_void_p
|
||||
|
||||
CoreFoundation.kCFAllocatorDefault = CFAllocatorRef.in_dll(
|
||||
CoreFoundation, "kCFAllocatorDefault"
|
||||
)
|
||||
CoreFoundation.kCFTypeArrayCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, "kCFTypeArrayCallBacks"
|
||||
)
|
||||
CoreFoundation.kCFTypeDictionaryKeyCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, "kCFTypeDictionaryKeyCallBacks"
|
||||
)
|
||||
CoreFoundation.kCFTypeDictionaryValueCallBacks = c_void_p.in_dll(
|
||||
CoreFoundation, "kCFTypeDictionaryValueCallBacks"
|
||||
)
|
||||
|
||||
CoreFoundation.CFTypeRef = CFTypeRef
|
||||
CoreFoundation.CFArrayRef = CFArrayRef
|
||||
CoreFoundation.CFStringRef = CFStringRef
|
||||
CoreFoundation.CFDictionaryRef = CFDictionaryRef
|
||||
|
||||
except (AttributeError):
|
||||
raise ImportError("Error initializing ctypes")
|
||||
|
||||
|
||||
class CFConst(object):
|
||||
"""
|
||||
A class object that acts as essentially a namespace for CoreFoundation
|
||||
constants.
|
||||
"""
|
||||
|
||||
kCFStringEncodingUTF8 = CFStringEncoding(0x08000100)
|
||||
|
||||
|
||||
class SecurityConst(object):
|
||||
"""
|
||||
A class object that acts as essentially a namespace for Security constants.
|
||||
"""
|
||||
|
||||
kSSLSessionOptionBreakOnServerAuth = 0
|
||||
|
||||
kSSLProtocol2 = 1
|
||||
kSSLProtocol3 = 2
|
||||
kTLSProtocol1 = 4
|
||||
kTLSProtocol11 = 7
|
||||
kTLSProtocol12 = 8
|
||||
kTLSProtocol13 = 10
|
||||
kTLSProtocolMaxSupported = 999
|
||||
|
||||
kSSLClientSide = 1
|
||||
kSSLStreamType = 0
|
||||
|
||||
kSecFormatPEMSequence = 10
|
||||
|
||||
kSecTrustResultInvalid = 0
|
||||
kSecTrustResultProceed = 1
|
||||
# This gap is present on purpose: this was kSecTrustResultConfirm, which
|
||||
# is deprecated.
|
||||
kSecTrustResultDeny = 3
|
||||
kSecTrustResultUnspecified = 4
|
||||
kSecTrustResultRecoverableTrustFailure = 5
|
||||
kSecTrustResultFatalTrustFailure = 6
|
||||
kSecTrustResultOtherError = 7
|
||||
|
||||
errSSLProtocol = -9800
|
||||
errSSLWouldBlock = -9803
|
||||
errSSLClosedGraceful = -9805
|
||||
errSSLClosedNoNotify = -9816
|
||||
errSSLClosedAbort = -9806
|
||||
|
||||
errSSLXCertChainInvalid = -9807
|
||||
errSSLCrypto = -9809
|
||||
errSSLInternal = -9810
|
||||
errSSLCertExpired = -9814
|
||||
errSSLCertNotYetValid = -9815
|
||||
errSSLUnknownRootCert = -9812
|
||||
errSSLNoRootCert = -9813
|
||||
errSSLHostNameMismatch = -9843
|
||||
errSSLPeerHandshakeFail = -9824
|
||||
errSSLPeerUserCancelled = -9839
|
||||
errSSLWeakPeerEphemeralDHKey = -9850
|
||||
errSSLServerAuthCompleted = -9841
|
||||
errSSLRecordOverflow = -9847
|
||||
|
||||
errSecVerifyFailed = -67808
|
||||
errSecNoTrustSettings = -25263
|
||||
errSecItemNotFound = -25300
|
||||
errSecInvalidTrustSettings = -25262
|
||||
|
||||
# Cipher suites. We only pick the ones our default cipher string allows.
|
||||
# Source: https://developer.apple.com/documentation/security/1550981-ssl_cipher_suite_values
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 = 0xC02C
|
||||
TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 = 0xC030
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B
|
||||
TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 = 0xC02F
|
||||
TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA9
|
||||
TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 = 0xCCA8
|
||||
TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 = 0x009F
|
||||
TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 = 0x009E
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 = 0xC024
|
||||
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 = 0xC028
|
||||
TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA = 0xC00A
|
||||
TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA = 0xC014
|
||||
TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 = 0x006B
|
||||
TLS_DHE_RSA_WITH_AES_256_CBC_SHA = 0x0039
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 = 0xC023
|
||||
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 = 0xC027
|
||||
TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA = 0xC009
|
||||
TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA = 0xC013
|
||||
TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 = 0x0067
|
||||
TLS_DHE_RSA_WITH_AES_128_CBC_SHA = 0x0033
|
||||
TLS_RSA_WITH_AES_256_GCM_SHA384 = 0x009D
|
||||
TLS_RSA_WITH_AES_128_GCM_SHA256 = 0x009C
|
||||
TLS_RSA_WITH_AES_256_CBC_SHA256 = 0x003D
|
||||
TLS_RSA_WITH_AES_128_CBC_SHA256 = 0x003C
|
||||
TLS_RSA_WITH_AES_256_CBC_SHA = 0x0035
|
||||
TLS_RSA_WITH_AES_128_CBC_SHA = 0x002F
|
||||
TLS_AES_128_GCM_SHA256 = 0x1301
|
||||
TLS_AES_256_GCM_SHA384 = 0x1302
|
||||
TLS_AES_128_CCM_8_SHA256 = 0x1305
|
||||
TLS_AES_128_CCM_SHA256 = 0x1304
|
|
@ -0,0 +1,328 @@
|
|||
"""
|
||||
Low-level helpers for the SecureTransport bindings.
|
||||
|
||||
These are Python functions that are not directly related to the high-level APIs
|
||||
but are necessary to get them to work. They include a whole bunch of low-level
|
||||
CoreFoundation messing about and memory management. The concerns in this module
|
||||
are almost entirely about trying to avoid memory leaks and providing
|
||||
appropriate and useful assistance to the higher-level code.
|
||||
"""
|
||||
import base64
|
||||
import ctypes
|
||||
import itertools
|
||||
import re
|
||||
import os
|
||||
import ssl
|
||||
import tempfile
|
||||
|
||||
from .bindings import Security, CoreFoundation, CFConst
|
||||
|
||||
|
||||
# This regular expression is used to grab PEM data out of a PEM bundle.
|
||||
_PEM_CERTS_RE = re.compile(
|
||||
b"-----BEGIN CERTIFICATE-----\n(.*?)\n-----END CERTIFICATE-----", re.DOTALL
|
||||
)
|
||||
|
||||
|
||||
def _cf_data_from_bytes(bytestring):
|
||||
"""
|
||||
Given a bytestring, create a CFData object from it. This CFData object must
|
||||
be CFReleased by the caller.
|
||||
"""
|
||||
return CoreFoundation.CFDataCreate(
|
||||
CoreFoundation.kCFAllocatorDefault, bytestring, len(bytestring)
|
||||
)
|
||||
|
||||
|
||||
def _cf_dictionary_from_tuples(tuples):
|
||||
"""
|
||||
Given a list of Python tuples, create an associated CFDictionary.
|
||||
"""
|
||||
dictionary_size = len(tuples)
|
||||
|
||||
# We need to get the dictionary keys and values out in the same order.
|
||||
keys = (t[0] for t in tuples)
|
||||
values = (t[1] for t in tuples)
|
||||
cf_keys = (CoreFoundation.CFTypeRef * dictionary_size)(*keys)
|
||||
cf_values = (CoreFoundation.CFTypeRef * dictionary_size)(*values)
|
||||
|
||||
return CoreFoundation.CFDictionaryCreate(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
cf_keys,
|
||||
cf_values,
|
||||
dictionary_size,
|
||||
CoreFoundation.kCFTypeDictionaryKeyCallBacks,
|
||||
CoreFoundation.kCFTypeDictionaryValueCallBacks,
|
||||
)
|
||||
|
||||
|
||||
def _cf_string_to_unicode(value):
|
||||
"""
|
||||
Creates a Unicode string from a CFString object. Used entirely for error
|
||||
reporting.
|
||||
|
||||
Yes, it annoys me quite a lot that this function is this complex.
|
||||
"""
|
||||
value_as_void_p = ctypes.cast(value, ctypes.POINTER(ctypes.c_void_p))
|
||||
|
||||
string = CoreFoundation.CFStringGetCStringPtr(
|
||||
value_as_void_p, CFConst.kCFStringEncodingUTF8
|
||||
)
|
||||
if string is None:
|
||||
buffer = ctypes.create_string_buffer(1024)
|
||||
result = CoreFoundation.CFStringGetCString(
|
||||
value_as_void_p, buffer, 1024, CFConst.kCFStringEncodingUTF8
|
||||
)
|
||||
if not result:
|
||||
raise OSError("Error copying C string from CFStringRef")
|
||||
string = buffer.value
|
||||
if string is not None:
|
||||
string = string.decode("utf-8")
|
||||
return string
|
||||
|
||||
|
||||
def _assert_no_error(error, exception_class=None):
|
||||
"""
|
||||
Checks the return code and throws an exception if there is an error to
|
||||
report
|
||||
"""
|
||||
if error == 0:
|
||||
return
|
||||
|
||||
cf_error_string = Security.SecCopyErrorMessageString(error, None)
|
||||
output = _cf_string_to_unicode(cf_error_string)
|
||||
CoreFoundation.CFRelease(cf_error_string)
|
||||
|
||||
if output is None or output == u"":
|
||||
output = u"OSStatus %s" % error
|
||||
|
||||
if exception_class is None:
|
||||
exception_class = ssl.SSLError
|
||||
|
||||
raise exception_class(output)
|
||||
|
||||
|
||||
def _cert_array_from_pem(pem_bundle):
|
||||
"""
|
||||
Given a bundle of certs in PEM format, turns them into a CFArray of certs
|
||||
that can be used to validate a cert chain.
|
||||
"""
|
||||
# Normalize the PEM bundle's line endings.
|
||||
pem_bundle = pem_bundle.replace(b"\r\n", b"\n")
|
||||
|
||||
der_certs = [
|
||||
base64.b64decode(match.group(1)) for match in _PEM_CERTS_RE.finditer(pem_bundle)
|
||||
]
|
||||
if not der_certs:
|
||||
raise ssl.SSLError("No root certificates specified")
|
||||
|
||||
cert_array = CoreFoundation.CFArrayCreateMutable(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
0,
|
||||
ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
|
||||
)
|
||||
if not cert_array:
|
||||
raise ssl.SSLError("Unable to allocate memory!")
|
||||
|
||||
try:
|
||||
for der_bytes in der_certs:
|
||||
certdata = _cf_data_from_bytes(der_bytes)
|
||||
if not certdata:
|
||||
raise ssl.SSLError("Unable to allocate memory!")
|
||||
cert = Security.SecCertificateCreateWithData(
|
||||
CoreFoundation.kCFAllocatorDefault, certdata
|
||||
)
|
||||
CoreFoundation.CFRelease(certdata)
|
||||
if not cert:
|
||||
raise ssl.SSLError("Unable to build cert object!")
|
||||
|
||||
CoreFoundation.CFArrayAppendValue(cert_array, cert)
|
||||
CoreFoundation.CFRelease(cert)
|
||||
except Exception:
|
||||
# We need to free the array before the exception bubbles further.
|
||||
# We only want to do that if an error occurs: otherwise, the caller
|
||||
# should free.
|
||||
CoreFoundation.CFRelease(cert_array)
|
||||
|
||||
return cert_array
|
||||
|
||||
|
||||
def _is_cert(item):
|
||||
"""
|
||||
Returns True if a given CFTypeRef is a certificate.
|
||||
"""
|
||||
expected = Security.SecCertificateGetTypeID()
|
||||
return CoreFoundation.CFGetTypeID(item) == expected
|
||||
|
||||
|
||||
def _is_identity(item):
|
||||
"""
|
||||
Returns True if a given CFTypeRef is an identity.
|
||||
"""
|
||||
expected = Security.SecIdentityGetTypeID()
|
||||
return CoreFoundation.CFGetTypeID(item) == expected
|
||||
|
||||
|
||||
def _temporary_keychain():
|
||||
"""
|
||||
This function creates a temporary Mac keychain that we can use to work with
|
||||
credentials. This keychain uses a one-time password and a temporary file to
|
||||
store the data. We expect to have one keychain per socket. The returned
|
||||
SecKeychainRef must be freed by the caller, including calling
|
||||
SecKeychainDelete.
|
||||
|
||||
Returns a tuple of the SecKeychainRef and the path to the temporary
|
||||
directory that contains it.
|
||||
"""
|
||||
# Unfortunately, SecKeychainCreate requires a path to a keychain. This
|
||||
# means we cannot use mkstemp to use a generic temporary file. Instead,
|
||||
# we're going to create a temporary directory and a filename to use there.
|
||||
# This filename will be 8 random bytes expanded into base64. We also need
|
||||
# some random bytes to password-protect the keychain we're creating, so we
|
||||
# ask for 40 random bytes.
|
||||
random_bytes = os.urandom(40)
|
||||
filename = base64.b16encode(random_bytes[:8]).decode("utf-8")
|
||||
password = base64.b16encode(random_bytes[8:]) # Must be valid UTF-8
|
||||
tempdirectory = tempfile.mkdtemp()
|
||||
|
||||
keychain_path = os.path.join(tempdirectory, filename).encode("utf-8")
|
||||
|
||||
# We now want to create the keychain itself.
|
||||
keychain = Security.SecKeychainRef()
|
||||
status = Security.SecKeychainCreate(
|
||||
keychain_path, len(password), password, False, None, ctypes.byref(keychain)
|
||||
)
|
||||
_assert_no_error(status)
|
||||
|
||||
# Having created the keychain, we want to pass it off to the caller.
|
||||
return keychain, tempdirectory
|
||||
|
||||
|
||||
def _load_items_from_file(keychain, path):
|
||||
"""
|
||||
Given a single file, loads all the trust objects from it into arrays and
|
||||
the keychain.
|
||||
Returns a tuple of lists: the first list is a list of identities, the
|
||||
second a list of certs.
|
||||
"""
|
||||
certificates = []
|
||||
identities = []
|
||||
result_array = None
|
||||
|
||||
with open(path, "rb") as f:
|
||||
raw_filedata = f.read()
|
||||
|
||||
try:
|
||||
filedata = CoreFoundation.CFDataCreate(
|
||||
CoreFoundation.kCFAllocatorDefault, raw_filedata, len(raw_filedata)
|
||||
)
|
||||
result_array = CoreFoundation.CFArrayRef()
|
||||
result = Security.SecItemImport(
|
||||
filedata, # cert data
|
||||
None, # Filename, leaving it out for now
|
||||
None, # What the type of the file is, we don't care
|
||||
None, # what's in the file, we don't care
|
||||
0, # import flags
|
||||
None, # key params, can include passphrase in the future
|
||||
keychain, # The keychain to insert into
|
||||
ctypes.byref(result_array), # Results
|
||||
)
|
||||
_assert_no_error(result)
|
||||
|
||||
# A CFArray is not very useful to us as an intermediary
|
||||
# representation, so we are going to extract the objects we want
|
||||
# and then free the array. We don't need to keep hold of keys: the
|
||||
# keychain already has them!
|
||||
result_count = CoreFoundation.CFArrayGetCount(result_array)
|
||||
for index in range(result_count):
|
||||
item = CoreFoundation.CFArrayGetValueAtIndex(result_array, index)
|
||||
item = ctypes.cast(item, CoreFoundation.CFTypeRef)
|
||||
|
||||
if _is_cert(item):
|
||||
CoreFoundation.CFRetain(item)
|
||||
certificates.append(item)
|
||||
elif _is_identity(item):
|
||||
CoreFoundation.CFRetain(item)
|
||||
identities.append(item)
|
||||
finally:
|
||||
if result_array:
|
||||
CoreFoundation.CFRelease(result_array)
|
||||
|
||||
CoreFoundation.CFRelease(filedata)
|
||||
|
||||
return (identities, certificates)
|
||||
|
||||
|
||||
def _load_client_cert_chain(keychain, *paths):
|
||||
"""
|
||||
Load certificates and maybe keys from a number of files. Has the end goal
|
||||
of returning a CFArray containing one SecIdentityRef, and then zero or more
|
||||
SecCertificateRef objects, suitable for use as a client certificate trust
|
||||
chain.
|
||||
"""
|
||||
# Ok, the strategy.
|
||||
#
|
||||
# This relies on knowing that macOS will not give you a SecIdentityRef
|
||||
# unless you have imported a key into a keychain. This is a somewhat
|
||||
# artificial limitation of macOS (for example, it doesn't necessarily
|
||||
# affect iOS), but there is nothing inside Security.framework that lets you
|
||||
# get a SecIdentityRef without having a key in a keychain.
|
||||
#
|
||||
# So the policy here is we take all the files and iterate them in order.
|
||||
# Each one will use SecItemImport to have one or more objects loaded from
|
||||
# it. We will also point at a keychain that macOS can use to work with the
|
||||
# private key.
|
||||
#
|
||||
# Once we have all the objects, we'll check what we actually have. If we
|
||||
# already have a SecIdentityRef in hand, fab: we'll use that. Otherwise,
|
||||
# we'll take the first certificate (which we assume to be our leaf) and
|
||||
# ask the keychain to give us a SecIdentityRef with that cert's associated
|
||||
# key.
|
||||
#
|
||||
# We'll then return a CFArray containing the trust chain: one
|
||||
# SecIdentityRef and then zero-or-more SecCertificateRef objects. The
|
||||
# responsibility for freeing this CFArray will be with the caller. This
|
||||
# CFArray must remain alive for the entire connection, so in practice it
|
||||
# will be stored with a single SSLSocket, along with the reference to the
|
||||
# keychain.
|
||||
certificates = []
|
||||
identities = []
|
||||
|
||||
# Filter out bad paths.
|
||||
paths = (path for path in paths if path)
|
||||
|
||||
try:
|
||||
for file_path in paths:
|
||||
new_identities, new_certs = _load_items_from_file(keychain, file_path)
|
||||
identities.extend(new_identities)
|
||||
certificates.extend(new_certs)
|
||||
|
||||
# Ok, we have everything. The question is: do we have an identity? If
|
||||
# not, we want to grab one from the first cert we have.
|
||||
if not identities:
|
||||
new_identity = Security.SecIdentityRef()
|
||||
status = Security.SecIdentityCreateWithCertificate(
|
||||
keychain, certificates[0], ctypes.byref(new_identity)
|
||||
)
|
||||
_assert_no_error(status)
|
||||
identities.append(new_identity)
|
||||
|
||||
# We now want to release the original certificate, as we no longer
|
||||
# need it.
|
||||
CoreFoundation.CFRelease(certificates.pop(0))
|
||||
|
||||
# We now need to build a new CFArray that holds the trust chain.
|
||||
trust_chain = CoreFoundation.CFArrayCreateMutable(
|
||||
CoreFoundation.kCFAllocatorDefault,
|
||||
0,
|
||||
ctypes.byref(CoreFoundation.kCFTypeArrayCallBacks),
|
||||
)
|
||||
for item in itertools.chain(identities, certificates):
|
||||
# ArrayAppendValue does a CFRetain on the item. That's fine,
|
||||
# because the finally block will release our other refs to them.
|
||||
CoreFoundation.CFArrayAppendValue(trust_chain, item)
|
||||
|
||||
return trust_chain
|
||||
finally:
|
||||
for obj in itertools.chain(identities, certificates):
|
||||
CoreFoundation.CFRelease(obj)
|
|
@ -0,0 +1,321 @@
|
|||
"""
|
||||
This module provides a pool manager that uses Google App Engine's
|
||||
`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
|
||||
|
||||
Example usage::
|
||||
|
||||
from urllib3 import PoolManager
|
||||
from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
|
||||
|
||||
if is_appengine_sandbox():
|
||||
# AppEngineManager uses AppEngine's URLFetch API behind the scenes
|
||||
http = AppEngineManager()
|
||||
else:
|
||||
# PoolManager uses a socket-level API behind the scenes
|
||||
http = PoolManager()
|
||||
|
||||
r = http.request('GET', 'https://google.com/')
|
||||
|
||||
There are `limitations <https://cloud.google.com/appengine/docs/python/\
|
||||
urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
|
||||
the best choice for your application. There are three options for using
|
||||
urllib3 on Google App Engine:
|
||||
|
||||
1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
|
||||
cost-effective in many circumstances as long as your usage is within the
|
||||
limitations.
|
||||
2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
|
||||
Sockets also have `limitations and restrictions
|
||||
<https://cloud.google.com/appengine/docs/python/sockets/\
|
||||
#limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
|
||||
To use sockets, be sure to specify the following in your ``app.yaml``::
|
||||
|
||||
env_variables:
|
||||
GAE_USE_SOCKETS_HTTPLIB : 'true'
|
||||
|
||||
3. If you are using `App Engine Flexible
|
||||
<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
|
||||
:class:`PoolManager` without any configuration or special environment variables.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
import io
|
||||
import logging
|
||||
import warnings
|
||||
from ..packages.six.moves.urllib.parse import urljoin
|
||||
|
||||
from ..exceptions import (
|
||||
HTTPError,
|
||||
HTTPWarning,
|
||||
MaxRetryError,
|
||||
ProtocolError,
|
||||
TimeoutError,
|
||||
SSLError,
|
||||
)
|
||||
|
||||
from ..request import RequestMethods
|
||||
from ..response import HTTPResponse
|
||||
from ..util.timeout import Timeout
|
||||
from ..util.retry import Retry
|
||||
from . import _appengine_environ
|
||||
|
||||
try:
|
||||
from google.appengine.api import urlfetch
|
||||
except ImportError:
|
||||
urlfetch = None
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppEnginePlatformWarning(HTTPWarning):
|
||||
pass
|
||||
|
||||
|
||||
class AppEnginePlatformError(HTTPError):
|
||||
pass
|
||||
|
||||
|
||||
class AppEngineManager(RequestMethods):
|
||||
"""
|
||||
Connection manager for Google App Engine sandbox applications.
|
||||
|
||||
This manager uses the URLFetch service directly instead of using the
|
||||
emulated httplib, and is subject to URLFetch limitations as described in
|
||||
the App Engine documentation `here
|
||||
<https://cloud.google.com/appengine/docs/python/urlfetch>`_.
|
||||
|
||||
Notably it will raise an :class:`AppEnginePlatformError` if:
|
||||
* URLFetch is not available.
|
||||
* If you attempt to use this on App Engine Flexible, as full socket
|
||||
support is available.
|
||||
* If a request size is more than 10 megabytes.
|
||||
* If a response size is more than 32 megabtyes.
|
||||
* If you use an unsupported request method such as OPTIONS.
|
||||
|
||||
Beyond those cases, it will raise normal urllib3 errors.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
headers=None,
|
||||
retries=None,
|
||||
validate_certificate=True,
|
||||
urlfetch_retries=True,
|
||||
):
|
||||
if not urlfetch:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch is not available in this environment."
|
||||
)
|
||||
|
||||
if is_prod_appengine_mvms():
|
||||
raise AppEnginePlatformError(
|
||||
"Use normal urllib3.PoolManager instead of AppEngineManager"
|
||||
"on Managed VMs, as using URLFetch is not necessary in "
|
||||
"this environment."
|
||||
)
|
||||
|
||||
warnings.warn(
|
||||
"urllib3 is using URLFetch on Google App Engine sandbox instead "
|
||||
"of sockets. To use sockets directly instead of URLFetch see "
|
||||
"https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.",
|
||||
AppEnginePlatformWarning,
|
||||
)
|
||||
|
||||
RequestMethods.__init__(self, headers)
|
||||
self.validate_certificate = validate_certificate
|
||||
self.urlfetch_retries = urlfetch_retries
|
||||
|
||||
self.retries = retries or Retry.DEFAULT
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
# Return False to re-raise any potential exceptions
|
||||
return False
|
||||
|
||||
def urlopen(
|
||||
self,
|
||||
method,
|
||||
url,
|
||||
body=None,
|
||||
headers=None,
|
||||
retries=None,
|
||||
redirect=True,
|
||||
timeout=Timeout.DEFAULT_TIMEOUT,
|
||||
**response_kw
|
||||
):
|
||||
|
||||
retries = self._get_retries(retries, redirect)
|
||||
|
||||
try:
|
||||
follow_redirects = redirect and retries.redirect != 0 and retries.total
|
||||
response = urlfetch.fetch(
|
||||
url,
|
||||
payload=body,
|
||||
method=method,
|
||||
headers=headers or {},
|
||||
allow_truncated=False,
|
||||
follow_redirects=self.urlfetch_retries and follow_redirects,
|
||||
deadline=self._get_absolute_timeout(timeout),
|
||||
validate_certificate=self.validate_certificate,
|
||||
)
|
||||
except urlfetch.DeadlineExceededError as e:
|
||||
raise TimeoutError(self, e)
|
||||
|
||||
except urlfetch.InvalidURLError as e:
|
||||
if "too large" in str(e):
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch request too large, URLFetch only "
|
||||
"supports requests up to 10mb in size.",
|
||||
e,
|
||||
)
|
||||
raise ProtocolError(e)
|
||||
|
||||
except urlfetch.DownloadError as e:
|
||||
if "Too many redirects" in str(e):
|
||||
raise MaxRetryError(self, url, reason=e)
|
||||
raise ProtocolError(e)
|
||||
|
||||
except urlfetch.ResponseTooLargeError as e:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch response too large, URLFetch only supports"
|
||||
"responses up to 32mb in size.",
|
||||
e,
|
||||
)
|
||||
|
||||
except urlfetch.SSLCertificateError as e:
|
||||
raise SSLError(e)
|
||||
|
||||
except urlfetch.InvalidMethodError as e:
|
||||
raise AppEnginePlatformError(
|
||||
"URLFetch does not support method: %s" % method, e
|
||||
)
|
||||
|
||||
http_response = self._urlfetch_response_to_http_response(
|
||||
response, retries=retries, **response_kw
|
||||
)
|
||||
|
||||
# Handle redirect?
|
||||
redirect_location = redirect and http_response.get_redirect_location()
|
||||
if redirect_location:
|
||||
# Check for redirect response
|
||||
if self.urlfetch_retries and retries.raise_on_redirect:
|
||||
raise MaxRetryError(self, url, "too many redirects")
|
||||
else:
|
||||
if http_response.status == 303:
|
||||
method = "GET"
|
||||
|
||||
try:
|
||||
retries = retries.increment(
|
||||
method, url, response=http_response, _pool=self
|
||||
)
|
||||
except MaxRetryError:
|
||||
if retries.raise_on_redirect:
|
||||
raise MaxRetryError(self, url, "too many redirects")
|
||||
return http_response
|
||||
|
||||
retries.sleep_for_retry(http_response)
|
||||
log.debug("Redirecting %s -> %s", url, redirect_location)
|
||||
redirect_url = urljoin(url, redirect_location)
|
||||
return self.urlopen(
|
||||
method,
|
||||
redirect_url,
|
||||
body,
|
||||
headers,
|
||||
retries=retries,
|
||||
redirect=redirect,
|
||||
timeout=timeout,
|
||||
**response_kw
|
||||
)
|
||||
|
||||
# Check if we should retry the HTTP response.
|
||||
has_retry_after = bool(http_response.getheader("Retry-After"))
|
||||
if retries.is_retry(method, http_response.status, has_retry_after):
|
||||
retries = retries.increment(method, url, response=http_response, _pool=self)
|
||||
log.debug("Retry: %s", url)
|
||||
retries.sleep(http_response)
|
||||
return self.urlopen(
|
||||
method,
|
||||
url,
|
||||
body=body,
|
||||
headers=headers,
|
||||
retries=retries,
|
||||
redirect=redirect,
|
||||
timeout=timeout,
|
||||
**response_kw
|
||||
)
|
||||
|
||||
return http_response
|
||||
|
||||
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
|
||||
|
||||
if is_prod_appengine():
|
||||
# Production GAE handles deflate encoding automatically, but does
|
||||
# not remove the encoding header.
|
||||
content_encoding = urlfetch_resp.headers.get("content-encoding")
|
||||
|
||||
if content_encoding == "deflate":
|
||||
del urlfetch_resp.headers["content-encoding"]
|
||||
|
||||
transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
|
||||
# We have a full response's content,
|
||||
# so let's make sure we don't report ourselves as chunked data.
|
||||
if transfer_encoding == "chunked":
|
||||
encodings = transfer_encoding.split(",")
|
||||
encodings.remove("chunked")
|
||||
urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
|
||||
|
||||
original_response = HTTPResponse(
|
||||
# In order for decoding to work, we must present the content as
|
||||
# a file-like object.
|
||||
body=io.BytesIO(urlfetch_resp.content),
|
||||
msg=urlfetch_resp.header_msg,
|
||||
headers=urlfetch_resp.headers,
|
||||
status=urlfetch_resp.status_code,
|
||||
**response_kw
|
||||
)
|
||||
|
||||
return HTTPResponse(
|
||||
body=io.BytesIO(urlfetch_resp.content),
|
||||
headers=urlfetch_resp.headers,
|
||||
status=urlfetch_resp.status_code,
|
||||
original_response=original_response,
|
||||
**response_kw
|
||||
)
|
||||
|
||||
def _get_absolute_timeout(self, timeout):
|
||||
if timeout is Timeout.DEFAULT_TIMEOUT:
|
||||
return None # Defer to URLFetch's default.
|
||||
if isinstance(timeout, Timeout):
|
||||
if timeout._read is not None or timeout._connect is not None:
|
||||
warnings.warn(
|
||||
"URLFetch does not support granular timeout settings, "
|
||||
"reverting to total or default URLFetch timeout.",
|
||||
AppEnginePlatformWarning,
|
||||
)
|
||||
return timeout.total
|
||||
return timeout
|
||||
|
||||
def _get_retries(self, retries, redirect):
|
||||
if not isinstance(retries, Retry):
|
||||
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
|
||||
|
||||
if retries.connect or retries.read or retries.redirect:
|
||||
warnings.warn(
|
||||
"URLFetch only supports total retries and does not "
|
||||
"recognize connect, read, or redirect retry parameters.",
|
||||
AppEnginePlatformWarning,
|
||||
)
|
||||
|
||||
return retries
|
||||
|
||||
|
||||
# Alias methods from _appengine_environ to maintain public API interface.
|
||||
|
||||
is_appengine = _appengine_environ.is_appengine
|
||||
is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
|
||||
is_local_appengine = _appengine_environ.is_local_appengine
|
||||
is_prod_appengine = _appengine_environ.is_prod_appengine
|
||||
is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms
|
|
@ -0,0 +1,123 @@
|
|||
"""
|
||||
NTLM authenticating pool, contributed by erikcederstran
|
||||
|
||||
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
from logging import getLogger
|
||||
from ntlm import ntlm
|
||||
|
||||
from .. import HTTPSConnectionPool
|
||||
from ..packages.six.moves.http_client import HTTPSConnection
|
||||
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
class NTLMConnectionPool(HTTPSConnectionPool):
|
||||
"""
|
||||
Implements an NTLM authentication version of an urllib3 connection pool
|
||||
"""
|
||||
|
||||
scheme = "https"
|
||||
|
||||
def __init__(self, user, pw, authurl, *args, **kwargs):
|
||||
"""
|
||||
authurl is a random URL on the server that is protected by NTLM.
|
||||
user is the Windows user, probably in the DOMAIN\\username format.
|
||||
pw is the password for the user.
|
||||
"""
|
||||
super(NTLMConnectionPool, self).__init__(*args, **kwargs)
|
||||
self.authurl = authurl
|
||||
self.rawuser = user
|
||||
user_parts = user.split("\\", 1)
|
||||
self.domain = user_parts[0].upper()
|
||||
self.user = user_parts[1]
|
||||
self.pw = pw
|
||||
|
||||
def _new_conn(self):
|
||||
# Performs the NTLM handshake that secures the connection. The socket
|
||||
# must be kept open while requests are performed.
|
||||
self.num_connections += 1
|
||||
log.debug(
|
||||
"Starting NTLM HTTPS connection no. %d: https://%s%s",
|
||||
self.num_connections,
|
||||
self.host,
|
||||
self.authurl,
|
||||
)
|
||||
|
||||
headers = {"Connection": "Keep-Alive"}
|
||||
req_header = "Authorization"
|
||||
resp_header = "www-authenticate"
|
||||
|
||||
conn = HTTPSConnection(host=self.host, port=self.port)
|
||||
|
||||
# Send negotiation message
|
||||
headers[req_header] = "NTLM %s" % ntlm.create_NTLM_NEGOTIATE_MESSAGE(
|
||||
self.rawuser
|
||||
)
|
||||
log.debug("Request headers: %s", headers)
|
||||
conn.request("GET", self.authurl, None, headers)
|
||||
res = conn.getresponse()
|
||||
reshdr = dict(res.getheaders())
|
||||
log.debug("Response status: %s %s", res.status, res.reason)
|
||||
log.debug("Response headers: %s", reshdr)
|
||||
log.debug("Response data: %s [...]", res.read(100))
|
||||
|
||||
# Remove the reference to the socket, so that it can not be closed by
|
||||
# the response object (we want to keep the socket open)
|
||||
res.fp = None
|
||||
|
||||
# Server should respond with a challenge message
|
||||
auth_header_values = reshdr[resp_header].split(", ")
|
||||
auth_header_value = None
|
||||
for s in auth_header_values:
|
||||
if s[:5] == "NTLM ":
|
||||
auth_header_value = s[5:]
|
||||
if auth_header_value is None:
|
||||
raise Exception(
|
||||
"Unexpected %s response header: %s" % (resp_header, reshdr[resp_header])
|
||||
)
|
||||
|
||||
# Send authentication message
|
||||
ServerChallenge, NegotiateFlags = ntlm.parse_NTLM_CHALLENGE_MESSAGE(
|
||||
auth_header_value
|
||||
)
|
||||
auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(
|
||||
ServerChallenge, self.user, self.domain, self.pw, NegotiateFlags
|
||||
)
|
||||
headers[req_header] = "NTLM %s" % auth_msg
|
||||
log.debug("Request headers: %s", headers)
|
||||
conn.request("GET", self.authurl, None, headers)
|
||||
res = conn.getresponse()
|
||||
log.debug("Response status: %s %s", res.status, res.reason)
|
||||
log.debug("Response headers: %s", dict(res.getheaders()))
|
||||
log.debug("Response data: %s [...]", res.read()[:100])
|
||||
if res.status != 200:
|
||||
if res.status == 401:
|
||||
raise Exception(
|
||||
"Server rejected request: wrong " "username or password"
|
||||
)
|
||||
raise Exception("Wrong server response: %s %s" % (res.status, res.reason))
|
||||
|
||||
res.fp = None
|
||||
log.debug("Connection established")
|
||||
return conn
|
||||
|
||||
def urlopen(
|
||||
self,
|
||||
method,
|
||||
url,
|
||||
body=None,
|
||||
headers=None,
|
||||
retries=3,
|
||||
redirect=True,
|
||||
assert_same_host=True,
|
||||
):
|
||||
if headers is None:
|
||||
headers = {}
|
||||
headers["Connection"] = "Keep-Alive"
|
||||
return super(NTLMConnectionPool, self).urlopen(
|
||||
method, url, body, headers, retries, redirect, assert_same_host
|
||||
)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue