sys.maxunicode

Here are the examples of the python api sys.maxunicode taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

144 Examples 7

3 Source : test_codeccallbacks.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_xmlcharrefvalues(self):
        # enhance coverage of:
        # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
        # and inline implementations
        v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
        if sys.maxunicode>=100000:
            v += (100000, 500000, 1000000)
        s = u"".join([unichr(x) for x in v])
        codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
        for enc in ("ascii", "iso-8859-15"):
            for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
                s.encode(enc, err)

    def test_decodehelper(self):

3 Source : test_codeccallbacks.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_translatehelper(self):
        # enhance coverage of:
        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
        # and callers
        # (Unfortunately the errors argument is not directly accessible
        # from Python, so we can't test that much)
        class D(dict):
            def __getitem__(self, key):
                raise ValueError
        self.assertRaises(ValueError, u"\xff".translate, D())
        self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
        self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})

    def test_bug828737(self):

3 Source : test_codecs.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_decode_error_attributes(self):
        if sys.maxunicode > 0xffff:
            try:
                "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
            except UnicodeDecodeError, ex:
                self.assertEqual("unicode_internal", ex.encoding)
                self.assertEqual("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
                self.assertEqual(4, ex.start)
                self.assertEqual(8, ex.end)
            else:
                self.fail()

    def test_decode_callback(self):

3 Source : test_codecs.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_decode_callback(self):
        if sys.maxunicode > 0xffff:
            codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
            decoder = codecs.getdecoder("unicode_internal")
            ab = u"ab".encode("unicode_internal")
            ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
                "UnicodeInternalTest")
            self.assertEqual((u"ab", 12), ignored)

    def test_encode_length(self):

3 Source : test_multibytecodec.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_bug1572832(self):
        if sys.maxunicode >= 0x10000:
            myunichr = unichr
        else:
            myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF))

        for x in xrange(0x10000, 0x110000):
            # Any ISO 2022 codec will cause the segfault
            myunichr(x).encode('iso_2022_jp', 'ignore')

class TestStateful(unittest.TestCase):

3 Source : test_normalization.py
with GNU General Public License v3.0
from adityaprakash-bobby

def unistr(data):
    data = [int(x, 16) for x in data.split(" ")]
    for x in data:
        if x > sys.maxunicode:
            raise RangeError
    return u"".join([unichr(x) for x in data])

class NormalizationTest(unittest.TestCase):

3 Source : test_unicode.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_raiseMemError(self):
        # Ensure that the freelist contains a consistent object, even
        # when a string allocation fails with a MemoryError.
        # This used to crash the interpreter,
        # or leak references when the number was smaller.
        charwidth = 4 if sys.maxunicode >= 0x10000 else 2
        # Note: sys.maxsize is half of the actual max allocation because of
        # the signedness of Py_ssize_t.
        alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
        self.assertRaises(MemoryError, alloc)
        self.assertRaises(MemoryError, alloc)

    def test_format_subclass(self):

3 Source : test_unicodedata.py
with GNU General Public License v3.0
from adityaprakash-bobby

    def test_bug_5828(self):
        self.assertEqual(u"\u1d79".lower(), u"\u1d79")
        # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
        self.assertEqual(
            [
                c for c in range(sys.maxunicode+1)
                if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
            ],
            [0]
        )

    def test_bug_4971(self):

3 Source : bleu_hook.py
with MIT License
from akzaidi

  def property_chars(self, prefix):
    return "".join(six.unichr(x) for x in range(sys.maxunicode)
                   if unicodedata.category(six.unichr(x)).startswith(prefix))


uregex = UnicodeRegex()

3 Source : tokenizer.py
with Apache License 2.0
from Ascend

def get_unicode_categories():
    import sys
    from collections import defaultdict
    import unicodedata
    cats = defaultdict(list)
    for c in map(chr, range(sys.maxunicode + 1)):
        cats[unicodedata.category(c)].append(c)
    return cats

NUMERICS = ''.join(get_unicode_categories()['No'])

3 Source : compute_bleu.py
with Apache License 2.0
from Ascend

  def property_chars(self, prefix):
    return "".join(
        six.unichr(x)
        for x in range(sys.maxunicode)
        if unicodedata.category(six.unichr(x)).startswith(prefix))


uregex = UnicodeRegex()

3 Source : tokenizer.py
with Apache License 2.0
from Ascend

def alphanumeric_char_set():
  return set(
      six.unichr(i)
      for i in xrange(sys.maxunicode)
      if (unicodedata.category(six.unichr(i)).startswith("L") or
          unicodedata.category(six.unichr(i)).startswith("N")))


# Set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = alphanumeric_char_set()

3 Source : markup_element.py
with Apache License 2.0
from asvbkr

    def get_markup_text(cls, text, markup):
        # type: (str, MarkupElement) -> str
        if not text:
            raise RuntimeError("This Message has no 'text'.")

        # Is it a narrow build, if so we don't need to convert
        if sys.maxunicode == 0xFFFF:
            return text[markup._from: markup._from + markup.length]

        entity_text = text.encode('utf-16-le')
        entity_text = entity_text[markup._from * 2: (markup._from + markup.length) * 2]
        return entity_text.decode('utf-16-le')

    @classmethod

3 Source : bleu_transformer.py
with Apache License 2.0
from asyml

    def property_chars(prefix):
        return "".join(
            chr(x)
            for x in range(sys.maxunicode)
            if unicodedata.category(chr(x)).startswith(prefix)
        )


uregex = UnicodeRegex()

3 Source : parser.py
with MIT License
from autofelix

def _replace_unicode(match):
    codepoint = int(match.group(1), 16)
    if codepoint > sys.maxunicode:
        codepoint = 0xFFFD
    return _unichr(codepoint)


def unescape_ident(value):

3 Source : message_entities.py
with GNU General Public License v3.0
from autogram

def parse_entity_text(entity: MessageEntity, message_text: str) -> str:
    # Is it a narrow build, if so we don't need to convert
    if sys.maxunicode == 0xFFFF:
        return message_text[entity.offset : entity.offset + entity.length]
    else:
        entity_text = message_text.encode("utf-16-le")
        entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]

        return entity_text.decode("utf-16-le")


@dataclass

3 Source : test_builtin.py
with MIT License
from bkerler

    def test_hasattr(self):
        self.assertTrue(hasattr(sys, 'stdout'))
        self.assertRaises(TypeError, hasattr, sys, 1)
        self.assertRaises(TypeError, hasattr)
        self.assertEqual(False, hasattr(sys, chr(sys.maxunicode)))

        # Check that hasattr propagates all exceptions outside of
        # AttributeError.
        class A:
            def __getattr__(self, what):
                raise SystemExit
        self.assertRaises(SystemExit, hasattr, A(), "b")
        class B:
            def __getattr__(self, what):
                raise ValueError
        self.assertRaises(ValueError, hasattr, B(), "b")

    def test_hash(self):

3 Source : test_codeccallbacks.py
with MIT License
from bkerler

    def test_translatehelper(self):
        # enhance coverage of:
        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
        # and callers
        # (Unfortunately the errors argument is not directly accessible
        # from Python, so we can't test that much)
        class D(dict):
            def __getitem__(self, key):
                raise ValueError
        #self.assertRaises(ValueError, "\xff".translate, D())
        self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1})
        self.assertRaises(TypeError, "\xff".translate, {0xff: ()})

    def test_bug828737(self):

3 Source : find-uname.py
with MIT License
from Dsa-Terminal

def main(args):
    unicode_names = []
    for ix in range(sys.maxunicode+1):
        try:
            unicode_names.append((ix, unicodedata.name(chr(ix))))
        except ValueError: # no name for the character
            pass
    for arg in args:
        pat = re.compile(arg, re.I)
        matches = [(y,x) for (x,y) in unicode_names
                   if pat.search(y) is not None]
        if matches:
            print("***", arg, "matches", "***")
            for match in matches:
                print("%s (%d)" % match)

if __name__ == "__main__":

3 Source : bleu.py
with Apache License 2.0
from eric-haibin-lin

    def _property_chars(self, prefix):
        return ''.join(six.unichr(x) for x in range(sys.maxunicode)
                       if unicodedata.category(six.unichr(x)).startswith(prefix))


unicodeRegex = UnicodeRegex()

3 Source : test_codeccallbacks.py
with MIT License
from fbla-competitive-events

    def test_translatehelper(self):
        # enhance coverage of:
        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
        # and callers
        # (Unfortunately the errors argument is not directly accessible
        # from Python, so we can't test that much)
        class D(dict):
            def __getitem__(self, key):
                raise ValueError
        #self.assertRaises(ValueError, "\xff".translate, D())
        self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
        self.assertRaises(TypeError, "\xff".translate, {0xff: ()})

    def test_bug828737(self):

3 Source : categories.py
with MIT License
from fluentpython

def category_stats():
    counts = collections.Counter()
    firsts = {}
    for code in range(sys.maxunicode + 1):
        char = chr(code)
        cat = category(char)
        if cat not in counts:
            firsts[cat] = char
        counts[cat] += 1
    return counts, firsts


def category_scan(desired):

3 Source : categories.py
with MIT License
from fluentpython

def category_scan(desired):
    for code in range(sys.maxunicode + 1):
        char = chr(code)
        if category(char) == desired:
            yield char


def main(args):

3 Source : test_htmlparser.py
with Apache License 2.0
from gethue

    def test_wide_unicode_xml(self):
        if sys.maxunicode   <   1114111:
            return  # skip test
        element = self.etree.HTML(_bytes(
            ' < html> < body> < p>\\U00026007 < /p> < /body> < /html>'
        ).decode('unicode_escape'))
        p_text = element.findtext('.//p')
        self.assertEqual(1, len(p_text))
        self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
                         p_text)

    def test_module_HTML_pretty_print(self):

3 Source : test_unicode.py
with Apache License 2.0
from gethue

    def test_wide_unicode_xml(self):
        if sys.maxunicode   <   1114111:
            return  # skip test
        tree = etree.XML(_bytes(' < p>\\U00026007 < /p>').decode('unicode_escape'))
        self.assertEqual(1, len(tree.text))
        self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'),
                         tree.text)

    def test_unicode_xml_broken(self):

3 Source : punctuation.py
with MIT License
from kemingy

    def get_punc_map(self, repl=" "):
        if not self.punc:
            self.punc = [
                c
                for c in range(sys.maxunicode)
                if unicodedata.category(chr(c)).startswith("P")
            ]
        if repl not in self.punc_map:
            self.punc_map[repl] = dict(zip(self.punc, repl * len(self.punc)))

        return self.punc_map[repl]

    def remove(self, text, repl=" "):

3 Source : ebcdic_parser.py
with MIT License
from larandvit

    def buildControlCharRegex(self):
        # create a list of control (unreadable/invisible) characters
        allChars = (chr(i) for i in range(sys.maxunicode))
        controlChars = ''.join(c for c in allChars if unicodedata.category(c) == 'Cc')
        return re.compile('[%s]' % re.escape(controlChars))
    
    def release(self):

3 Source : bleu_tool.py
with MIT License
from LiqunChen0606

    def property_chars(self, prefix):
        #pylint:disable=no-self-use
        return "".join(six.unichr(x) for x in range(sys.maxunicode) \
            if unicodedata.category(six.unichr(x)).startswith(prefix))


uregex = UnicodeRegex()

3 Source : sequence_generator.py
with Apache License 2.0
from mlbench

    def property_chars(self, prefix):
        return "".join(
            six.unichr(x)
            for x in range(sys.maxunicode)
            if unicodedata.category(six.unichr(x)).startswith(prefix)
        )


uregex = UnicodeRegex()

3 Source : sequence_generator.py
with Apache License 2.0
from mlbench

    def property_chars(self, prefix):
        return "".join(
            six.unichr(x)
            for x in range(sys.maxunicode)
            if unicodedata.category(six.unichr(x)).startswith(prefix)
        )


class SequenceGenerator(object):

3 Source : StarFile.py
with MIT License
from pedrobcst

    def set_characterset(self,characterset):
        """Set the characterset for checking datanames: may be `ascii` or `unicode`"""
        import sys
        self.characterset = characterset
        if characterset == 'ascii':
            self.char_check = re.compile("[][ \n\r\t!%&\(\)*+,./:  <  =>?@0-9A-Za-z\\\\^`{}\|~\"#$';_-]+",re.M)
        elif characterset == 'unicode':
            if sys.maxunicode  <  1114111:
               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./: < =>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD-]+",re.M)
            else:
               self.char_check = re.compile(u"[][ \n\r\t!%&\(\)*+,./: < =>?@0-9A-Za-z\\\\^`{}\|~\"#$';_\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFD\U00010000-\U0010FFFD-]+",re.M)

    def __str__(self):

3 Source : index_demo.py
with BSD 3-Clause "New" or "Revised" License
from ramalho

def build_index():
    idx = index.Index()
    for i in range(32, sys.maxunicode + 1):
        char = chr(i)
        for word in unicodedata.name(char, "").split():
            idx.add(word, char)
    return idx


def find(words):

3 Source : __init__.py
with Apache License 2.0
from Richienb

def unilimit():
    """
    Get The Highest Unicode Value
    """
    return sys.maxunicode


def pyversion(part=None):

3 Source : writer.py
with GNU Affero General Public License v3.0
from singer-io

def test_unicode_characters(quoting: csv = csv.QUOTE_MINIMAL):
    with open('test_unicode_characters.csv', 'w') as file:
        write = csv.writer(file, quoting=quoting)
        write.writerow(["unicode character test", "character"])
        for char in range(sys.maxunicode):
            try:
                write.writerow([chr(char), char])
            except UnicodeEncodeError:
                pass


def test_bigint_valid_range():

3 Source : test_regex.py
with MIT License
from sissaschool

    def test_get_code_point_range(self):
        self.assertEqual(get_code_point_range(97), (97, 98))
        self.assertEqual(get_code_point_range((97, 100)), (97, 100))
        self.assertEqual(get_code_point_range([97, 100]), [97, 100])

        self.assertIsNone(get_code_point_range(-1))
        self.assertIsNone(get_code_point_range(sys.maxunicode + 1))
        self.assertIsNone(get_code_point_range((-1, 100)))
        self.assertIsNone(get_code_point_range((97, sys.maxunicode + 2)))
        self.assertIsNone(get_code_point_range(97.0))
        self.assertIsNone(get_code_point_range((97.0, 100)))


class TestParseCharacterSubset(unittest.TestCase):

3 Source : test_regex.py
with MIT License
from sissaschool

    def test_creation(self):
        subset = UnicodeSubset([(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)])
        self.assertEqual(subset, [(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)])
        self.assertEqual(UnicodeSubset('0-9'), [(48, 58)])
        self.assertEqual(UnicodeSubset('0-9:'), [(48, 59)])

        subset = UnicodeSubset('a-z')
        self.assertEqual(UnicodeSubset(subset), [(ord('a'), ord('z') + 1)])

    def test_repr(self):

3 Source : test_regex.py
with MIT License
from sissaschool

    def test_repr(self):
        self.assertEqual(code_point_repr((ord('2'), ord('\\') + 1)), r'2-\\')

        subset = UnicodeSubset('a-z')
        self.assertEqual(repr(subset), "UnicodeSubset('a-z')")
        self.assertEqual(str(subset), "a-z")

        subset = UnicodeSubset((50, 90))
        subset.codepoints.append(sys.maxunicode + 10)  # Invalid subset
        self.assertRaises(ValueError, repr, subset)

    def test_modify(self):

3 Source : test_regex.py
with MIT License
from sissaschool

    def test_complement(self):
        char_class = CharacterClass('a-z')
        self.assertListEqual(char_class.positive.codepoints, [(97, 123)])
        self.assertListEqual(char_class.negative.codepoints, [])

        char_class.complement()
        self.assertListEqual(char_class.positive.codepoints, [])
        self.assertListEqual(char_class.negative.codepoints, [(97, 123)])
        self.assertEqual(str(char_class), '[^a-z]')

        char_class = CharacterClass()
        char_class.complement()
        self.assertEqual(len(char_class), sys.maxunicode + 1)

    def test_isub_operator(self):

3 Source : test_regex.py
with MIT License
from sissaschool

    def test_iterate(self):
        char_class = CharacterClass('A-Za-z')
        self.assertEqual(''.join(chr(c) for c in char_class),
                         string.ascii_uppercase + string.ascii_lowercase)

        char_class.complement()
        self.assertEqual(len(''.join(chr(c) for c in char_class)),
                         sys.maxunicode + 1 - len(string.ascii_letters))

    def test_length(self):

3 Source : test_regex.py
with MIT License
from sissaschool

    def test_unicode_categories(self):
        self.assertEqual(sum(len(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1),
                         sys.maxunicode + 1)
        self.assertEqual(min([min(s) for s in UNICODE_CATEGORIES.values()]), 0)
        self.assertEqual(max([max(s) for s in UNICODE_CATEGORIES.values()]), sys.maxunicode)
        base_sets = [set(v) for k, v in UNICODE_CATEGORIES.items() if len(k) > 1]
        self.assertFalse(any(s.intersection(t) for s in base_sets for t in base_sets if s != t))

    @unittest.skipIf(not ((3, 8)   <  = sys.version_info  <  (3, 9)), "Test only for Python 3.8")

3 Source : unicode.py
with MIT License
from smola

	def unichr(ch):
		if ch   <  = sys.maxunicode:
			return _unichr(ch)
		else:
			ch -= 0x10000
			return _unichr((ch >> 10) + 0xD800) + _unichr((ch & ((1  <  <  10) - 1)) + 0xDC00)


def u(s):

3 Source : test_ajit.py
with MIT License
from soIu

    def test_unichar_ord_is_never_signed_on_64bit(self):
        import sys
        if sys.maxunicode == 0xffff:
            py.test.skip("test for 32-bit unicodes")
        def f(x):
            return ord(rffi.cast(lltype.UniChar, x))
        res = self.interp_operations(f, [-1])
        if sys.maxint == 2147483647:
            assert res == -1
        else:
            assert res == 4294967295

    def test_issue2200_recursion(self):

3 Source : test_runicode.py
with MIT License
from soIu

def test_unichr():
    assert runicode.UNICHR(0xffff) == u'\uffff'
    if runicode.MAXUNICODE > 0xffff:
        if sys.maxunicode   <   0x10000:
            assert runicode.UNICHR(0x10000) == u'\ud800\udc00'
        else:
            assert runicode.UNICHR(0x10000) == u'\U00010000'
    else:
        py.test.raises(ValueError, runicode.UNICHR, 0x10000)
    py.test.raises(TypeError, runicode.UNICHR, 'abc')


def test_ord():

3 Source : test_runicode.py
with MIT License
from soIu

def test_ord():
    assert runicode.ORD('a') == 97
    assert runicode.ORD(u'a') == 97
    assert runicode.ORD(u'\uffff') == 0xffff
    if runicode.MAXUNICODE > 0xffff:
        if sys.maxunicode   <   0x10000:
            assert runicode.ORD(u'\ud800\udc00') == 0x10000
        else:
            assert runicode.ORD(u'\U00010000') == 0x10000
    else:
        py.test.raises(TypeError, runicode.ORD, u'\ud800\udc00')
    py.test.raises(TypeError, runicode.ORD, 'abc')


class UnicodeTests(object):

3 Source : test_runicode.py
with MIT License
from soIu

    def test_random(self):
        for i in range(10000):
            v = random.randrange(sys.maxunicode)
            if 0xd800   <  = v  < = 0xdfff:
                continue
            uni = unichr(v)
            if sys.version >= "2.7":
                self.checkdecode(uni, "utf-7")
            for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
                             "utf-32 utf-32-be utf-32-le").split():
                self.checkdecode(uni, encoding)

    # Same as above, but uses Hypothesis to generate non-surrogate unicode
    # characters.
    @settings(max_examples=10000)

3 Source : test_runicode.py
with MIT License
from soIu

    def test_maxunicode(self):
        uni = unichr(sys.maxunicode)
        if sys.version >= "2.7":
            self.checkdecode(uni, "utf-7")
        for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
                         "utf-32 utf-32-be utf-32-le").split():
            self.checkdecode(uni, encoding)

    def test_ascii_error(self):

3 Source : test_runicode.py
with MIT License
from soIu

    def test_random(self):
        for i in range(10000):
            v = random.randrange(sys.maxunicode)
            if 0xd800   <  = v  < = 0xdfff:
                continue
            uni = unichr(v)
            if sys.version >= "2.7":
                self.checkencode(uni, "utf-7")
            for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
                             "utf-32 utf-32-be utf-32-le").split():
                self.checkencode(uni, encoding)

    def test_maxunicode(self):

3 Source : test_runicode.py
with MIT License
from soIu

    def test_maxunicode(self):
        uni = unichr(sys.maxunicode)
        if sys.version >= "2.7":
            self.checkencode(uni, "utf-7")
        for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
                         "utf-32 utf-32-be utf-32-le").split():
            self.checkencode(uni, encoding)

    def test_empty(self):

3 Source : test_rutf8.py
with MIT License
from soIu

def _test_check_utf8(s, allow_surrogates):
    try:
        u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True,
                                         allow_surrogates=allow_surrogates)
        valid = True
    except UnicodeDecodeError as e:
        valid = False
    length = rutf8._check_utf8(s, allow_surrogates, 0, len(s))
    if length   <   0:
        assert not valid
        assert ~(length) == e.start
    else:
        assert valid
        if sys.maxunicode == 0x10FFFF or not _has_surrogates(s):
            assert length == len(u)

@given(strategies.characters())

3 Source : test_rutf8.py
with MIT License
from soIu

def test_check_newline_utf8():
    for i in xrange(sys.maxunicode):
        if runicode.unicodedb.islinebreak(i):
            assert rutf8.islinebreak(unichr(i).encode('utf8'), 0)
        else:
            assert not rutf8.islinebreak(unichr(i).encode('utf8'), 0)

def test_isspace_utf8():

See More Examples