How to use the forte.data.datasets.wikipedia.db_utils.get_resource_name function in forte

To help you get started, we’ve selected a few forte examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github asyml / forte / forte / data / datasets / wikipedia / dbpedia_infobox_reader.py View on Github external
def add_info_boxes(pack: DataPack, info_box_statements: List):
    for _, v, o in info_box_statements:
        info_box = WikiInfoBoxMapped(pack)
        info_box.key = v.toPython()
        info_box.value = get_resource_name(o)
github asyml / forte / forte / data / datasets / wikipedia / dbpedia_infobox_reader.py View on Github external
def add_property(pack: DataPack, statements: List):
    for _, v, o in statements:
        slot_name = v.toPython()
        slot_value = get_resource_name(o)
        info_box = WikiInfoBoxProperty(pack)
        info_box.key = slot_name
        info_box.value = slot_value
github asyml / forte / forte / data / datasets / wikipedia / dbpedia_based_reader.py View on Github external
def add_info_boxes(pack: DataPack, info_box_statements: List):
    for _, v, o in info_box_statements:
        slot_name = v.toPython()
        slot_value = get_resource_name(o)
        info_box = WikiInfoBoxMapped(pack)
        info_box.key = slot_name
        info_box.value = slot_value
github asyml / forte / forte / data / datasets / wikipedia / dbpedia_based_reader.py View on Github external
logging.info("Provided anchor end is %d, "
                         "clipped to fit with the text.", end)
            end = len(pack.text)

        if end <= begin:
            logging.info("Provided anchor [%d:%d is invalid.]", begin, end)
            continue

        anchor = WikiAnchor(pack, begin, end)
        for info_key, info_value in link_infos.items():
            if info_key == 'type':
                anchor_type = get_resource_fragment(info_value)
                if not anchor_type == 'Phrase' and not anchor_type == 'Word':
                    logging.warning("Unknown anchor type: %s", info_value)
            if info_key == 'taIdentRef':
                target_page_name = get_resource_name(info_value)
                if target_page_name in redirects:
                    target_page_name = redirects[target_page_name]
                anchor.target_page_name = target_page_name
github asyml / forte / forte / data / datasets / wikipedia / dbpedia_based_reader.py View on Github external
def _collect(self, nif_context: str  # type: ignore
                 ) -> Iterator[Tuple[Dict[str, str],
                                     Dict[str, List[state_type]]]]:
        str_data: Dict[str, str] = {}
        node_data: Dict[str, List[state_type]] = {}

        for context_statements in NIFParser(nif_context):
            for s, v, o, c in context_statements:
                nif_type = get_resource_attribute(s, "nif")
                print_progress(f'Collecting DBpedia context: [{c.identifier}]')

                if nif_type and nif_type == "context" and get_resource_fragment(
                        v) == 'isString':
                    str_data['text'] = o.toPython()
                    str_data['doc_name'] = get_resource_name(s)
                    str_data['oldid'] = get_resource_attribute(
                        c.identifier, 'oldid')

                    node_data['struct'] = self.struct_reader.get(c)
                    node_data['links'] = self.link_reader.get(c)

                    yield str_data, node_data
        print(' ..Done')
github asyml / forte / forte / data / datasets / wikipedia / dbpedia_infobox_reader.py View on Github external
def _collect(self, info_box_raw: str  # type: ignore
                 ) -> Iterator[Tuple[str, Dict[str, List[state_type]]]]:
        for c, statements in ContextGroupedNIFReader(info_box_raw):
            yield get_resource_name(statements[0][0]), {
                'properties': statements,
                'literals': self.literal_info_reader.get(c),
                'objects': self.object_info_reader.get(c),
            }