How to use blingfire - 8 common examples

To help you get started, we’ve selected a few blingfire examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / BlingFire / ldbsrc / bert_base_tok / test_bling.py View on Github external
# load bert base tokenizer model, note one model can be used by multiple threads within the same process
# h = load_model("./bert_base_tok.bin")
h = blingfire.load_model(os.path.join(os.path.dirname(blingfire.__file__), "bert_base_tok.bin"))

for line in sys.stdin:

    line = line.strip()
    print(line)

    #line = text_to_words(line)
    #print(line)

    ids = blingfire.text_to_ids(h, line, 128, 100)
    print(ids)

blingfire.free_model(h)
github microsoft / BlingFire / ldbsrc / bert_base_tok / test_bling.py View on Github external
import sys
import os
import blingfire
# from blingfiretok import *


# load bert base tokenizer model, note one model can be used by multiple threads within the same process
# h = load_model("./bert_base_tok.bin")
h = blingfire.load_model(os.path.join(os.path.dirname(blingfire.__file__), "bert_base_tok.bin"))

for line in sys.stdin:

    line = line.strip()
    print(line)

    #line = text_to_words(line)
    #print(line)

    ids = blingfire.text_to_ids(h, line, 128, 100)
    print(ids)

blingfire.free_model(h)
github microsoft / BlingFire / ldbsrc / bert_base_tok / test_bling.py View on Github external
# from blingfiretok import *


# load bert base tokenizer model, note one model can be used by multiple threads within the same process
# h = load_model("./bert_base_tok.bin")
h = blingfire.load_model(os.path.join(os.path.dirname(blingfire.__file__), "bert_base_tok.bin"))

for line in sys.stdin:

    line = line.strip()
    print(line)

    #line = text_to_words(line)
    #print(line)

    ids = blingfire.text_to_ids(h, line, 128, 100)
    print(ids)

blingfire.free_model(h)
github soskek / bookcorpus / make_sentlines.py View on Github external
def convert_into_sentences(lines):
    stack = []
    sent_L = []
    n_sent = 0
    for chunk in lines:
        if not chunk.strip():
            if stack:
                sents = text_to_sentences(
                    " ".join(stack).strip().replace('\n', ' ')).split('\n')
                sent_L.extend(sents)
                n_sent += len(sents)
                sent_L.append('\n')
                stack = []
            continue
        stack.append(chunk.strip())

    if stack:
        sents = text_to_sentences(
            " ".join(stack).strip().replace('\n', ' ')).split('\n')
        sent_L.extend(sents)
        n_sent += len(sents)
    return sent_L, n_sent
github nipunsadvilkar / pySBD / benchmarks / genia_benchmark.py View on Github external
def blingfire_tokenize(text):
    return blingfire.text_to_sentences(text).split('\n')
github nipunsadvilkar / pySBD / benchmarks / benchmark.py View on Github external
def blingfire_tokenize(text):
    return blingfire.text_to_sentences(text).split('\n')
github sgraaf / Replicate-Toronto-BookCorpus / src / utils.py View on Github external
def text2sentences(text: str) -> str:
    lines = [line.strip() for line in text.splitlines()]
    stack = []
    sentences = []

    for line in lines:
        if line:
            stack.append(line)
        elif stack:  # empty line and non-empty stack
                sentences += text_to_sentences(' '.join(stack).strip()).splitlines()
                stack = []

    return '\n'.join(sentences)
github soskek / bookcorpus / tokenize_sentlines.py View on Github external
import sys
from blingfire import text_to_words


for l in sys.stdin:
    if l.strip():
        print(text_to_words(l.strip()))
    else:
        print('')

blingfire

Python wrapper of lightning fast Finite State Machine based NLP library.

MIT
Latest version published 3 years ago

Package Health Score

60 / 100
Full package analysis

Similar packages