How to use the quantulum3.classifier function in quantulum3

To help you get started, we’ve selected a few quantulum3 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
def test_parse_classifier(self):
        all_tests = load_tests(False) + load_tests(True)
        # forcedly activate classifier
        clf.USE_CLF = True
        for test in sorted(all_tests, key=lambda x: len(x['req'])):
            quants = p.parse(test['req'])
            self.assertEqual(
                quants, test['res'],
                "{} \n {}".format([quant.__dict__ for quant in quants],
                                  [quant.__dict__ for quant in test['res']]))
github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
def test_training(self):
        # TODO - update test to not overwirte existing clf.pickle and wiki.json files.
        clf.train_classifier(False)
        clf.train_classifier(True)
github nielstron / quantulum3 / quantulum3 / disambiguate.py View on Github external
def disambiguate_unit(unit_surface, text, lang="en_US"):
    """
    Resolve ambiguity between units with same names, symbols or abbreviations.
    :returns (str) unit name of the resolved unit
    """
    if clf.USE_CLF:
        base = clf.disambiguate_unit(unit_surface, text, lang).name
    else:
        base = (
            load.units(lang).symbols[unit_surface]
            or load.units(lang).surfaces[unit_surface]
            or load.units(lang).surfaces_lower[unit_surface.lower()]
            or load.units(lang).symbols_lower[unit_surface.lower()]
        )

        if len(base) > 1:
            base = no_clf.disambiguate_no_classifier(base, text, lang)
        elif len(base) == 1:
            base = next(iter(base))

        if base:
            base = base.name
github nielstron / quantulum3 / scripts / extract_vere.py View on Github external
def glove_via_magnitude(topn=200,
                        min_similarity=None,
                        filename='glove.6B.100d.magnitude'):

    from pymagnitude import Magnitude

    v = Magnitude(os.path.join(TOPDIR, filename))
    training_set = list()
    units = set()
    for unit_list in classifier.ambiguous_units():
        for unit in unit_list[1]:
            units.add(unit)
    for unit in units:
        print('Processing {}...'.format(unit.name))

        name = unit.name
        surfaces = set(unit.name)
        if isinstance(unit, classes.Unit):
            surfaces.update(unit.surfaces)
            surfaces.update(unit.symbols)
        for surface in surfaces:
            neighbours = v.most_similar(
                v.query(surface), topn=topn, min_similarity=min_similarity)
            training_set.append({
                'unit':
                name,
github nielstron / quantulum3 / quantulum3 / disambiguate.py View on Github external
def disambiguate_entity(key, text, lang="en_US"):
    """
    Resolve ambiguity between entities with same dimensionality.
    """
    try:
        if clf.USE_CLF:
            ent = clf.disambiguate_entity(key, text, lang)
        else:
            derived = load.entities().derived[key]
            if len(derived) > 1:
                ent = no_clf.disambiguate_no_classifier(derived, text, lang)
                ent = load.entities().names[ent]
            elif len(derived) == 1:
                ent = next(iter(derived))
            else:
                ent = None
    except (KeyError, StopIteration):
        ent = None

    return ent
github nielstron / quantulum3 / scripts / extract_vere.py View on Github external
def glove_via_magnitude(
    topn=500, min_similarity=None, filename="glove.6B.100d.magnitude", lang="en_US"
):

    from pymagnitude import Magnitude

    v = Magnitude(os.path.join(TOPDIR, filename))
    training_set = list()
    units = set()
    for unit_list in classifier.ambiguous_units():
        for unit in unit_list[1]:
            units.add(unit)
    for unit in units:
        print("Processing {}...".format(unit.name))

        name = unit.name
        surfaces = set(unit.name)
        if isinstance(unit, classes.Unit):
            surfaces.update(unit.surfaces)
            surfaces.update(unit.symbols)
        for surface in surfaces:
            neighbours = v.most_similar(
                v.query(surface), topn=topn, min_similarity=min_similarity
            )
            training_set.append(
                {