How to use the srsly.json_loads function in srsly

To help you get started, we’ve selected a few srsly examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / spacy / cli / converters / jsonl2json.py View on Github external
def ner_jsonl2json(input_data, lang=None, n_sents=10, use_morphology=False, **_):
    if lang is None:
        raise ValueError("No --lang specified, but tokenization required")
    json_docs = []
    input_examples = [srsly.json_loads(line) for line in input_data.strip().split("\n")]
    nlp = get_lang_class(lang)()
    sentencizer = nlp.create_pipe("sentencizer")
    for i, batch in enumerate(minibatch(input_examples, size=n_sents)):
        docs = []
        for record in batch:
            raw_text = record["text"]
            if "entities" in record:
                ents = record["entities"]
            else:
                ents = record["spans"]
            ents = [(e["start"], e["end"], e["label"]) for e in ents]
            doc = nlp.make_doc(raw_text)
            sentencizer(doc)
            spans = [doc.char_span(s, e, label=L) for s, e, L in ents]
            doc.ents = _cleanup_spans(spans)
            docs.append(doc)
github explosion / prodigy-recipes / contrib / phrases / phrases.py View on Github external
"--label is a required argument",
            "This is the label that will be assigned to all patterns "
            "created from terms collected in this dataset. ",
            exits=1,
            error=True,
        )

    DB = connect()

    def get_pattern(term, label):
        return {"label": label, "pattern": [{"lower": t.lower()} for t in term["text"].split()]}

    log("RECIPE: Starting recipe terms.to-patterns", locals())
    if dataset is None:
        log("RECIPE: Reading input terms from sys.stdin")
        terms = (srsly.json_loads(line) for line in sys.stdin)
    else:
        if dataset not in DB:
            prints("Can't find dataset '{}'".format(dataset), exits=1, error=True)
        terms = DB.get_dataset(dataset)
        log(
            "RECIPE: Reading {} input terms from dataset {}".format(len(terms), dataset)
        )
    if output_file:
        patterns = [
            get_pattern(term, label) for term in terms if term["answer"] == "accept"
        ]
        log("RECIPE: Generated {} patterns".format(len(patterns)))
        srsly.write_jsonl(output_file, patterns)
        prints("Exported {} patterns".format(len(patterns)), output_file)
    else:
        log("RECIPE: Outputting patterns")
github explosion / spaCy / spacy / cli / profile.py View on Github external
def _read_inputs(loc, msg):
    if loc == "-":
        msg.info("Reading input from sys.stdin")
        file_ = sys.stdin
        file_ = (line.encode("utf8") for line in file_)
    else:
        input_path = Path(loc)
        if not input_path.exists() or not input_path.is_file():
            msg.fail("Not a valid input data file", loc, exits=1)
        msg.info("Using data from {}".format(input_path.parts[-1]))
        file_ = input_path.open()
    for line in file_:
        data = srsly.json_loads(line)
        text = data["text"]
        yield text
github explosion / spaCy / spacy / language.py View on Github external
        deserializers["meta.json"] = lambda b: self.meta.update(srsly.json_loads(b))
        deserializers["vocab"] = lambda b: self.vocab.from_bytes(
github explosion / spaCy / bin / load_reddit.py View on Github external
def __iter__(self):
        for file_path in self.iter_files():
            with bz2.open(str(file_path)) as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    comment = srsly.json_loads(line)
                    if self.is_valid(comment):
                        text = self.strip_tags(comment["body"])
                        yield {"text": text}