How to use the ftfy.bad_codecs.utf8_variants.IncrementalDecoder function in ftfy

To help you get started, we’ve selected a few ftfy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github LuminosoInsight / python-ftfy / tests / test_bytes.py View on Github external
def test_incomplete_sequences():
    test_bytes = b'surrogates: \xed\xa0\x80\xed\xb0\x80 / null: \xc0\x80'
    test_string = 'surrogates: \U00010000 / null: \x00'

    # Test that we can feed this string to decode() in multiple pieces, and no
    # matter where the break between those pieces is, we get the same result.
    for split_point in range(len(test_string) + 1):
        left = test_bytes[:split_point]
        right = test_bytes[split_point:]

        decoder = IncrementalDecoder()
        got = decoder.decode(left, final=False)
        got += decoder.decode(right)
        eq_(got, test_string)
github LuminosoInsight / python-ftfy / ftfy / bad_codecs / sloppy_utf8.py View on Github external
own will be displayed as a slightly ugly ellipsis instead of a replacement
  character.)

Aside from these cases, it acts the same as the "utf-8-variants" decoder.
Encoding with "sloppy-utf-8" is the same as encoding with "utf-8".
"""
from __future__ import unicode_literals
import codecs
from ftfy.bad_codecs.utf8_variants import (
    IncrementalEncoder, IncrementalDecoder,
    UTF8IncrementalDecoder
)
NAME = 'sloppy-utf-8'


class SloppyIncrementalDecoder(IncrementalDecoder):
    def _buffer_decode_step(self, input, errors, final):
        """
        There are three possibilities for each decoding step:

        - Decode as much apparently-real UTF-8 as possible.
        - Decode a six-byte CESU-8 sequence at the current position.
        - Decode a Java-style null at the current position.

        When decoding "apparently-real UTF-8", we might get an error,
        and that's where the sloppiness kicks in. If the error is something
        we recognize and can fix, we'll fix it.
        """
        # Get a reference to the superclass method that we'll be using for
        # most of the real work.
        sup = UTF8IncrementalDecoder._buffer_decode
github LuminosoInsight / python-ftfy / ftfy / bad_codecs / utf8_variants.py View on Github external
class StreamWriter(codecs.StreamWriter):
    encode = encode


class StreamReader(codecs.StreamReader):
    decode = decode


CODEC_INFO = codecs.CodecInfo(
    name=NAME,
    encode=encode,
    decode=decode,
    incrementalencoder=IncrementalEncoder,
    incrementaldecoder=IncrementalDecoder,
    streamreader=StreamReader,
    streamwriter=StreamWriter,
)
github LuminosoInsight / python-ftfy / ftfy / bad_codecs / utf8_variants.py View on Github external
class StreamWriter(codecs.StreamWriter):
    encode = encode


class StreamReader(codecs.StreamReader):
    decode = decode


CODEC_INFO = codecs.CodecInfo(
    name=NAME,
    encode=encode,
    decode=decode,
    incrementalencoder=IncrementalEncoder,
    incrementaldecoder=IncrementalDecoder,
    streamreader=StreamReader,
    streamwriter=StreamWriter,
)
github LuminosoInsight / python-ftfy / ftfy / bad_codecs / utf8_variants.py View on Github external
def decode(input, errors='strict'):
    return IncrementalDecoder(errors).decode(input, final=True), len(input)
github LuminosoInsight / python-ftfy / ftfy / bad_codecs / utf8_variants.py View on Github external
def decode(input, errors='strict'):
    return IncrementalDecoder(errors).decode(input), len(input)