Here are the examples of the python api sys.maxunicode taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
144 Examples
3
Source : test_codeccallbacks.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_xmlcharrefvalues(self):
# enhance coverage of:
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
# and inline implementations
v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
if sys.maxunicode>=100000:
v += (100000, 500000, 1000000)
s = u"".join([unichr(x) for x in v])
codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
for enc in ("ascii", "iso-8859-15"):
for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
s.encode(enc, err)
def test_decodehelper(self):
3
Source : test_codeccallbacks.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python, so we can't test that much)
class D(dict):
def __getitem__(self, key):
raise ValueError
self.assertRaises(ValueError, u"\xff".translate, D())
self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
def test_bug828737(self):
3
Source : test_codecs.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_decode_error_attributes(self):
if sys.maxunicode > 0xffff:
try:
"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
except UnicodeDecodeError, ex:
self.assertEqual("unicode_internal", ex.encoding)
self.assertEqual("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
self.assertEqual(4, ex.start)
self.assertEqual(8, ex.end)
else:
self.fail()
def test_decode_callback(self):
3
Source : test_codecs.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_decode_callback(self):
if sys.maxunicode > 0xffff:
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
ab = u"ab".encode("unicode_internal")
ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"UnicodeInternalTest")
self.assertEqual((u"ab", 12), ignored)
def test_encode_length(self):
3
Source : test_multibytecodec.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_bug1572832(self):
if sys.maxunicode >= 0x10000:
myunichr = unichr
else:
myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF))
for x in xrange(0x10000, 0x110000):
# Any ISO 2022 codec will cause the segfault
myunichr(x).encode('iso_2022_jp', 'ignore')
class TestStateful(unittest.TestCase):
3
Source : test_normalization.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def unistr(data):
data = [int(x, 16) for x in data.split(" ")]
for x in data:
if x > sys.maxunicode:
raise RangeError
return u"".join([unichr(x) for x in data])
class NormalizationTest(unittest.TestCase):
3
Source : test_unicode.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_raiseMemError(self):
# Ensure that the freelist contains a consistent object, even
# when a string allocation fails with a MemoryError.
# This used to crash the interpreter,
# or leak references when the number was smaller.
charwidth = 4 if sys.maxunicode >= 0x10000 else 2
# Note: sys.maxsize is half of the actual max allocation because of
# the signedness of Py_ssize_t.
alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
self.assertRaises(MemoryError, alloc)
self.assertRaises(MemoryError, alloc)
def test_format_subclass(self):
3
Source : test_unicodedata.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_bug_5828(self):
self.assertEqual(u"\u1d79".lower(), u"\u1d79")
# Only U+0000 should have U+0000 as its upper/lower/titlecase variant
self.assertEqual(
[
c for c in range(sys.maxunicode+1)
if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
],
[0]
)
def test_bug_4971(self):
3
Source : bleu_hook.py
with MIT License
from akzaidi
with MIT License
from akzaidi
def property_chars(self, prefix):
return "".join(six.unichr(x) for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix))
uregex = UnicodeRegex()
3
Source : tokenizer.py
with Apache License 2.0
from Ascend
with Apache License 2.0
from Ascend
def get_unicode_categories():
import sys
from collections import defaultdict
import unicodedata
cats = defaultdict(list)
for c in map(chr, range(sys.maxunicode + 1)):
cats[unicodedata.category(c)].append(c)
return cats
NUMERICS = ''.join(get_unicode_categories()['No'])
3
Source : compute_bleu.py
with Apache License 2.0
from Ascend
with Apache License 2.0
from Ascend
def property_chars(self, prefix):
return "".join(
six.unichr(x)
for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix))
uregex = UnicodeRegex()
3
Source : tokenizer.py
with Apache License 2.0
from Ascend
with Apache License 2.0
from Ascend
def alphanumeric_char_set():
return set(
six.unichr(i)
for i in xrange(sys.maxunicode)
if (unicodedata.category(six.unichr(i)).startswith("L") or
unicodedata.category(six.unichr(i)).startswith("N")))
# Set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = alphanumeric_char_set()
3
Source : markup_element.py
with Apache License 2.0
from asvbkr
with Apache License 2.0
from asvbkr
def get_markup_text(cls, text, markup):
# type: (str, MarkupElement) -> str
if not text:
raise RuntimeError("This Message has no 'text'.")
# Is it a narrow build, if so we don't need to convert
if sys.maxunicode == 0xFFFF:
return text[markup._from: markup._from + markup.length]
entity_text = text.encode('utf-16-le')
entity_text = entity_text[markup._from * 2: (markup._from + markup.length) * 2]
return entity_text.decode('utf-16-le')
@classmethod
3
Source : bleu_transformer.py
with Apache License 2.0
from asyml
with Apache License 2.0
from asyml
def property_chars(prefix):
return "".join(
chr(x)
for x in range(sys.maxunicode)
if unicodedata.category(chr(x)).startswith(prefix)
)
uregex = UnicodeRegex()
3
Source : parser.py
with MIT License
from autofelix
with MIT License
from autofelix
def _replace_unicode(match):
codepoint = int(match.group(1), 16)
if codepoint > sys.maxunicode:
codepoint = 0xFFFD
return _unichr(codepoint)
def unescape_ident(value):
3
Source : message_entities.py
with GNU General Public License v3.0
from autogram
with GNU General Public License v3.0
from autogram
def parse_entity_text(entity: MessageEntity, message_text: str) -> str:
# Is it a narrow build, if so we don't need to convert
if sys.maxunicode == 0xFFFF:
return message_text[entity.offset : entity.offset + entity.length]
else:
entity_text = message_text.encode("utf-16-le")
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
return entity_text.decode("utf-16-le")
@dataclass
3
Source : test_builtin.py
with MIT License
from bkerler
with MIT License
from bkerler
def test_hasattr(self):
self.assertTrue(hasattr(sys, 'stdout'))
self.assertRaises(TypeError, hasattr, sys, 1)
self.assertRaises(TypeError, hasattr)
self.assertEqual(False, hasattr(sys, chr(sys.maxunicode)))
# Check that hasattr propagates all exceptions outside of
# AttributeError.
class A:
def __getattr__(self, what):
raise SystemExit
self.assertRaises(SystemExit, hasattr, A(), "b")
class B:
def __getattr__(self, what):
raise ValueError
self.assertRaises(ValueError, hasattr, B(), "b")
def test_hash(self):
3
Source : test_codeccallbacks.py
with MIT License
from bkerler
with MIT License
from bkerler
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python, so we can't test that much)
class D(dict):
def __getitem__(self, key):
raise ValueError
#self.assertRaises(ValueError, "\xff".translate, D())
self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
def test_bug828737(self):
3
Source : find-uname.py
with MIT License
from Dsa-Terminal
with MIT License
from Dsa-Terminal
def main(args):
unicode_names = []
for ix in range(sys.maxunicode+1):
try:
unicode_names.append((ix, unicodedata.name(chr(ix))))
except ValueError: # no name for the character
pass
for arg in args:
pat = re.compile(arg, re.I)
matches = [(y,x) for (x,y) in unicode_names
if pat.search(y) is not None]
if matches:
print("***", arg, "matches", "***")
for match in matches:
print("%s (%d)" % match)
if __name__ == "__main__":
3
Source : bleu.py
with Apache License 2.0
from eric-haibin-lin
with Apache License 2.0
from eric-haibin-lin
def _property_chars(self, prefix):
return ''.join(six.unichr(x) for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix))
unicodeRegex = UnicodeRegex()
3
Source : test_codeccallbacks.py
with MIT License
from fbla-competitive-events
with MIT License
from fbla-competitive-events
def test_translatehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
# and callers
# (Unfortunately the errors argument is not directly accessible
# from Python, so we can't test that much)
class D(dict):
def __getitem__(self, key):
raise ValueError
#self.assertRaises(ValueError, "\xff".translate, D())
self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
def test_bug828737(self):
3
Source : categories.py
with MIT License
from fluentpython
with MIT License
from fluentpython
def category_stats():
counts = collections.Counter()
firsts = {}
for code in range(sys.maxunicode + 1):
char = chr(code)
cat = category(char)
if cat not in counts:
firsts[cat] = char
counts[cat] += 1
return counts, firsts
def category_scan(desired):
3
Source : categories.py
with MIT License
from fluentpython
with MIT License
from fluentpython
def category_scan(desired):
for code in range(sys.maxunicode + 1):
char = chr(code)
if category(char) == desired:
yield char
def main(args):
3
Source : test_htmlparser.py
with Apache License 2.0
from gethue
with Apache License 2.0
from gethue
def test_wide_unicode_xml(self):
if sys.maxunicode < 1114111:
return # skip test
element = self.etree.HTML(_bytes(
' < html> < body> < p>\\U00026007 < /p> < /body> < /html>'
).decode('unicode_escape'))
p_text = element.findtext('.//p')
self.assertEqual(1, len(p_text))
self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
p_text)
def test_module_HTML_pretty_print(self):
3
Source : test_unicode.py
with Apache License 2.0
from gethue
with Apache License 2.0
from gethue
def test_wide_unicode_xml(self):
if sys.maxunicode < 1114111:
return # skip test
tree = etree.XML(_bytes(' < p>\\U00026007 < /p>').decode('unicode_escape'))
self.assertEqual(1, len(tree.text))
self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
tree.text)
def test_unicode_xml_broken(self):
3
Source : punctuation.py
with MIT License
from kemingy
with MIT License
from kemingy
def get_punc_map(self, repl=" "):
if not self.punc:
self.punc = [
c
for c in range(sys.maxunicode)
if unicodedata.category(chr(c)).startswith("P")
]
if repl not in self.punc_map:
self.punc_map[repl] = dict(zip(self.punc, repl * len(self.punc)))
return self.punc_map[repl]
def remove(self, text, repl=" "):
3
Source : ebcdic_parser.py
with MIT License
from larandvit
with MIT License
from larandvit
def buildControlCharRegex(self):
# create a list of control (unreadable/invisible) characters
allChars = (chr(i) for i in range(sys.maxunicode))
controlChars = ''.join(c for c in allChars if unicodedata.category(c) == 'Cc')
return re.compile('[%s]' % re.escape(controlChars))
def release(self):
3
Source : bleu_tool.py
with MIT License
from LiqunChen0606
with MIT License
from LiqunChen0606
def property_chars(self, prefix):
#pylint:disable=no-self-use
return "".join(six.unichr(x) for x in range(sys.maxunicode) \
if unicodedata.category(six.unichr(x)).startswith(prefix))
uregex = UnicodeRegex()
3
Source : sequence_generator.py
with Apache License 2.0
from mlbench
with Apache License 2.0
from mlbench
def property_chars(self, prefix):
return "".join(
six.unichr(x)
for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix)
)
uregex = UnicodeRegex()
3
Source : sequence_generator.py
with Apache License 2.0
from mlbench
with Apache License 2.0
from mlbench
def property_chars(self, prefix):
return "".join(
six.unichr(x)
for x in range(sys.maxunicode)
if unicodedata.category(six.unichr(x)).startswith(prefix)
)
class SequenceGenerator(object):
3
Source : StarFile.py
with MIT License
from pedrobcst
with MIT License
from pedrobcst
def set_characterset(self,characterset):
"""Set the characterset for checking datanames: may be `ascii` or `unicode`"""
import sys
self.characterset = characterset
if characterset == 'ascii':
self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./: < =>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
elif characterset == 'unicode':
if sys.maxunicode < 1114111:
self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./: < =>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
else:
self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./: < =>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)
def __str__(self):
3
Source : index_demo.py
with BSD 3-Clause "New" or "Revised" License
from ramalho
with BSD 3-Clause "New" or "Revised" License
from ramalho
def build_index():
idx = index.Index()
for i in range(32, sys.maxunicode + 1):
char = chr(i)
for word in unicodedata.name(char, "").split():
idx.add(word, char)
return idx
def find(words):
3
Source : __init__.py
with Apache License 2.0
from Richienb
with Apache License 2.0
from Richienb
def unilimit():
"""
Get The Highest Unicode Value
"""
return sys.maxunicode
def pyversion(part=None):
3
Source : writer.py
with GNU Affero General Public License v3.0
from singer-io
with GNU Affero General Public License v3.0
from singer-io
def test_unicode_characters(quoting: csv = csv.QUOTE_MINIMAL):
with open('test_unicode_characters.csv', 'w') as file:
write = csv.writer(file, quoting=quoting)
write.writerow(["unicode character test", "character"])
for char in range(sys.maxunicode):
try:
write.writerow([chr(char), char])
except UnicodeEncodeError:
pass
def test_bigint_valid_range():
3
Source : test_regex.py
with MIT License
from sissaschool
with MIT License
from sissaschool
def test_get_code_point_range(self):
self.assertEqual(get_code_point_range(97), (97, 98))
self.assertEqual(get_code_point_range((97, 100)), (97, 100))
self.assertEqual(get_code_point_range([97, 100]), [97, 100])
self.assertIsNone(get_code_point_range(-1))
self.assertIsNone(get_code_point_range(sys.maxunicode + 1))
self.assertIsNone(get_code_point_range((-1, 100)))
self.assertIsNone(get_code_point_range((97, sys.maxunicode + 2)))
self.assertIsNone(get_code_point_range(97.0))
self.assertIsNone(get_code_point_range((97.0, 100)))
class TestParseCharacterSubset(unittest.TestCase):
3
Source : test_regex.py
with MIT License
from sissaschool
with MIT License
from sissaschool
def test_creation(self):
subset = UnicodeSubset([(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)])
self.assertEqual(subset, [(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)])
self.assertEqual(UnicodeSubset('0-9'), [(48, 58)])
self.assertEqual(UnicodeSubset('0-9:'), [(48, 59)])
subset = UnicodeSubset('a-z')
self.assertEqual(UnicodeSubset(subset), [(ord('a'), ord('z') + 1)])
def test_repr(self):
3
Source : test_regex.py
with MIT License
from sissaschool
with MIT License
from sissaschool
def test_repr(self):
self.assertEqual(code_point_repr((ord('2'), ord('\\') + 1)), r'2-\\')
subset = UnicodeSubset('a-z')
self.assertEqual(repr(subset), "UnicodeSubset('a-z')")
self.assertEqual(str(subset), "a-z")
subset = UnicodeSubset((50, 90))
subset.codepoints.append(sys.maxunicode + 10) # Invalid subset
self.assertRaises(ValueError, repr, subset)
def test_modify(self):
3
Source : test_regex.py
with MIT License
from sissaschool
with MIT License
from sissaschool
def test_complement(self):
char_class = CharacterClass('a-z')
self.assertListEqual(char_class.positive.codepoints, [(97, 123)])
self.assertListEqual(char_class.negative.codepoints, [])
char_class.complement()
self.assertListEqual(char_class.positive.codepoints, [])
self.assertListEqual(char_class.negative.codepoints, [(97, 123)])
self.assertEqual(str(char_class), '[^a-z]')
char_class = CharacterClass()
char_class.complement()
self.assertEqual(len(char_class), sys.maxunicode + 1)
def test_isub_operator(self):
3
Source : test_regex.py
with MIT License
from sissaschool
with MIT License
from sissaschool
def test_iterate(self):
char_class = CharacterClass('A-Za-z')
self.assertEqual(''.join(chr(c) for c in char_class),
string.ascii_uppercase + string.ascii_lowercase)
char_class.complement()
self.assertEqual(len(''.join(chr(c) for c in char_class)),
sys.maxunicode + 1 - len(string.ascii_letters))
def test_length(self):
3
Source : test_regex.py
with MIT License
from sissaschool
with MIT License
from sissaschool
def test_unicode_categories(self):
self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1),
sys.maxunicode + 1)
self.assertEqual(min([min(s) for s in UNICODE_CATEGORIES.values()]), 0)
self.assertEqual(max([max(s) for s in UNICODE_CATEGORIES.values()]), sys.maxunicode)
base_sets = [set(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1]
self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t))
@unittest.skipIf(not ((3, 8) < = sys.version_info < (3, 9)), "Test only for Python 3.8")
3
Source : unicode.py
with MIT License
from smola
with MIT License
from smola
def unichr(ch):
if ch < = sys.maxunicode:
return _unichr(ch)
else:
ch -= 0x10000
return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1 < < 10) - 1)) + 0xDC00)
def u(s):
3
Source : test_ajit.py
with MIT License
from soIu
with MIT License
from soIu
def test_unichar_ord_is_never_signed_on_64bit(self):
import sys
if sys.maxunicode == 0xffff:
py.test.skip("test for 32-bit unicodes")
def f(x):
return ord(rffi.cast(lltype.UniChar, x))
res = self.interp_operations(f, [-1])
if sys.maxint == 2147483647:
assert res == -1
else:
assert res == 4294967295
def test_issue2200_recursion(self):
3
Source : test_runicode.py
with MIT License
from soIu
with MIT License
from soIu
def test_unichr():
assert runicode.UNICHR(0xffff) == u'\uffff'
if runicode.MAXUNICODE > 0xffff:
if sys.maxunicode < 0x10000:
assert runicode.UNICHR(0x10000) == u'\ud800\udc00'
else:
assert runicode.UNICHR(0x10000) == u'\U00010000'
else:
py.test.raises(ValueError, runicode.UNICHR, 0x10000)
py.test.raises(TypeError, runicode.UNICHR, 'abc')
def test_ord():
3
Source : test_runicode.py
with MIT License
from soIu
with MIT License
from soIu
def test_ord():
assert runicode.ORD('a') == 97
assert runicode.ORD(u'a') == 97
assert runicode.ORD(u'\uffff') == 0xffff
if runicode.MAXUNICODE > 0xffff:
if sys.maxunicode < 0x10000:
assert runicode.ORD(u'\ud800\udc00') == 0x10000
else:
assert runicode.ORD(u'\U00010000') == 0x10000
else:
py.test.raises(TypeError, runicode.ORD, u'\ud800\udc00')
py.test.raises(TypeError, runicode.ORD, 'abc')
class UnicodeTests(object):
3
Source : test_runicode.py
with MIT License
from soIu
with MIT License
from soIu
def test_random(self):
for i in range(10000):
v = random.randrange(sys.maxunicode)
if 0xd800 < = v < = 0xdfff:
continue
uni = unichr(v)
if sys.version >= "2.7":
self.checkdecode(uni, "utf-7")
for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
self.checkdecode(uni, encoding)
# Same as above, but uses Hypothesis to generate non-surrogate unicode
# characters.
@settings(max_examples=10000)
3
Source : test_runicode.py
with MIT License
from soIu
with MIT License
from soIu
def test_maxunicode(self):
uni = unichr(sys.maxunicode)
if sys.version >= "2.7":
self.checkdecode(uni, "utf-7")
for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
self.checkdecode(uni, encoding)
def test_ascii_error(self):
3
Source : test_runicode.py
with MIT License
from soIu
with MIT License
from soIu
def test_random(self):
for i in range(10000):
v = random.randrange(sys.maxunicode)
if 0xd800 < = v < = 0xdfff:
continue
uni = unichr(v)
if sys.version >= "2.7":
self.checkencode(uni, "utf-7")
for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
self.checkencode(uni, encoding)
def test_maxunicode(self):
3
Source : test_runicode.py
with MIT License
from soIu
with MIT License
from soIu
def test_maxunicode(self):
uni = unichr(sys.maxunicode)
if sys.version >= "2.7":
self.checkencode(uni, "utf-7")
for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
self.checkencode(uni, encoding)
def test_empty(self):
3
Source : test_rutf8.py
with MIT License
from soIu
with MIT License
from soIu
def _test_check_utf8(s, allow_surrogates):
try:
u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True,
allow_surrogates=allow_surrogates)
valid = True
except UnicodeDecodeError as e:
valid = False
length = rutf8._check_utf8(s, allow_surrogates, 0, len(s))
if length < 0:
assert not valid
assert ~(length) == e.start
else:
assert valid
if sys.maxunicode == 0x10FFFF or not _has_surrogates(s):
assert length == len(u)
@given(strategies.characters())
3
Source : test_rutf8.py
with MIT License
from soIu
with MIT License
from soIu
def test_check_newline_utf8():
for i in xrange(sys.maxunicode):
if runicode.unicodedb.islinebreak(i):
assert rutf8.islinebreak(unichr(i).encode('utf8'), 0)
else:
assert not rutf8.islinebreak(unichr(i).encode('utf8'), 0)
def test_isspace_utf8():
See More Examples