How to use the forte.pipeline.Pipeline function in forte

To help you get started, we’ve selected a few forte examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github asyml / forte / examples / ner / main_predict.py View on Github external
from forte.common.configuration import Config
from forte.data.data_pack import DataPack
from forte.pipeline import Pipeline
from forte.data.readers.conll03_reader import CoNLL03Reader
from forte.processors.ner_predictor import CoNLLNERPredictor
from ft.onto.base_ontology import Token, Sentence, EntityMention

config_data = yaml.safe_load(open("config_data.yml", "r"))
config_model = yaml.safe_load(open("config_model.yml", "r"))

config = Config({}, default_hparams=None)
config.add_hparam('config_data', config_data)
config.add_hparam('config_model', config_model)

pl = Pipeline[DataPack]()
pl.set_reader(CoNLL03Reader())
pl.add(CoNLLNERPredictor(), config=config)

pl.initialize()

for pack in pl.process_dataset(config.config_data.test_path):
    for pred_sentence in pack.get_data(
            context_type=Sentence,
            request={
                Token: {"fields": ["ner"]},
                Sentence: [],  # span by default
                EntityMention: {}
            }):
        print("============================")
        print(pred_sentence["context"])
        print(pred_sentence["Token"]["ner"])
github asyml / forte / examples / pipelines / process_string_example.py View on Github external
def main():
    pl = Pipeline[DataPack]()
    pl.set_reader(StringReader())
    pl.add(NLTKSentenceSegmenter())
    pl.add(NLTKWordTokenizer())
    pl.add(NLTKPOSTagger())

    pl.add(CoNLLNERPredictor(), config=config.NER)
    pl.add(SRLPredictor(), config=config.SRL)

    pl.initialize()

    text = (
        "So I was excited to see Journey to the Far Side of the Sun finally "
        "get released on an affordable DVD (the previous print had been "
        "fetching $100 on eBay - I'm sure those people wish they had their "
        "money back - but more about that in a second).")
github asyml / forte / examples / serialization / serialize_example.py View on Github external
coref_pl.add(ExampleCorefCounter())

    coref_pl.add(
        MultiPackWriter(),
        config={
            'output_dir': output_path,
            'indent': 2,
            'overwrite': True,
        }
    )

    coref_pl.run(input_path)

    print("We can then load the saved results, and see if everything is OK. "
          "We should see the same number of multi packs there. ")
    reading_pl = Pipeline()
    reading_pl.set_reader(MultiPackDiskReader(), {'data_path': output_path})
    reading_pl.add(ExampleCorefCounter())
    reading_pl.run()
github asyml / forte / examples / passage_ranker / create_index.py View on Github external
from forte.data.readers import MSMarcoPassageReader
from forte.pipeline import Pipeline
from forte.processors.ir import ElasticSearchIndexProcessor

logging.basicConfig(level=logging.INFO)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_file", default="./config.yml",
                        help="Config YAML filepath")
    args = parser.parse_args()

    config = yaml.safe_load(open(args.config_file, "r"))
    config = Config(config, default_hparams=None)

    nlp: Pipeline[DataPack] = Pipeline()
    nlp.set_reader(MSMarcoPassageReader())
    nlp.add(ElasticSearchIndexProcessor(), config=config.create_index)
    nlp.initialize()

    data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             config.data.relative_path)

    for idx, pack in enumerate(nlp.process_dataset(data_path)):
        if idx + 1 > 0 and (idx + 1) % 10000 == 0:
            print(f"Indexed {idx + 1} packs")
github asyml / forte / examples / serialization / serialize_example.py View on Github external
"""
    This example reads data from input path, and write multi pack output
    to output path.

    Args:
        input_path:
        output_path:

    Returns:

    """
    print("Multi Pack serialization example.")

    print("We first read the data, and add multi-packs to them, and then "
          "save the results.")
    coref_pl = Pipeline()
    coref_pl.set_reader(DirPackReader())
    coref_pl.add(MultiPackBoxer())
    coref_pl.add(PackCopier())
    coref_pl.add(ExampleCoreferencer())
    coref_pl.add(ExampleCorefCounter())

    coref_pl.add(
        MultiPackWriter(),
        config={
            'output_dir': output_path,
            'indent': 2,
            'overwrite': True,
        }
    )

    coref_pl.run(input_path)
github asyml / forte / forte / train_pipeline.py View on Github external
def prepare(self):
        prepare_pl = Pipeline()
        prepare_pl.set_reader(self.train_reader)
        for p in self.preprocessors:
            prepare_pl.add(p)
        prepare_pl.run(self.configs.config_data.train_path)