How to use quantulum3 - 10 common examples

To help you get started, we’ve selected a few quantulum3 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
unit = l.NAMES[item['unit']]
                except KeyError:
                    try:
                        entity = item['entity']
                    except KeyError:
                        print(('Could not find %s, provide "derived" and'
                               ' "entity"' % item['unit']))
                        return
                    if entity == 'unknown':
                        derived = [{
                            'base': l.NAMES[i['base']].entity.name,
                            'power': i['power']
                        } for i in item['dimensions']]
                        entity = c.Entity(name='unknown', dimensions=derived)
                    elif entity in l.ENTITIES:
                        entity = l.ENTITIES[entity]
                    else:
                        print(('Could not find %s, provide "derived" and'
                               ' "entity"' % item['unit']))
                        return
                    unit = c.Unit(
                        name=item['unit'],
                        dimensions=item['dimensions'],
                        entity=entity)
                try:
                    span = next(
                        re.finditer(re.escape(item['surface']),
                                    test['req'])).span()
                except StopIteration:
                    print('Surface mismatch for "%s"' % test['req'])
                    return
                uncert = None
github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
print(('Could not find %s, provide "derived" and'
                               ' "entity"' % item['unit']))
                        return
                    if entity == 'unknown':
                        derived = [{
                            'base': l.NAMES[i['base']].entity.name,
                            'power': i['power']
                        } for i in item['dimensions']]
                        entity = c.Entity(name='unknown', dimensions=derived)
                    elif entity in l.ENTITIES:
                        entity = l.ENTITIES[entity]
                    else:
                        print(('Could not find %s, provide "derived" and'
                               ' "entity"' % item['unit']))
                        return
                    unit = c.Unit(
                        name=item['unit'],
                        dimensions=item['dimensions'],
                        entity=entity)
                try:
                    span = next(
                        re.finditer(re.escape(item['surface']),
                                    test['req'])).span()
                except StopIteration:
                    print('Surface mismatch for "%s"' % test['req'])
                    return
                uncert = None
                if 'uncertainty' in item:
                    uncert = item['uncertainty']
                res.append(
                    c.Quantity(
                        value=item['value'],
github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
for item in test['res']:
                try:
                    unit = l.NAMES[item['unit']]
                except KeyError:
                    try:
                        entity = item['entity']
                    except KeyError:
                        print(('Could not find %s, provide "derived" and'
                               ' "entity"' % item['unit']))
                        return
                    if entity == 'unknown':
                        derived = [{
                            'base': l.NAMES[i['base']].entity.name,
                            'power': i['power']
                        } for i in item['dimensions']]
                        entity = c.Entity(name='unknown', dimensions=derived)
                    elif entity in l.ENTITIES:
                        entity = l.ENTITIES[entity]
                    else:
                        print(('Could not find %s, provide "derived" and'
                               ' "entity"' % item['unit']))
                        return
                    unit = c.Unit(
                        name=item['unit'],
                        dimensions=item['dimensions'],
                        entity=entity)
                try:
                    span = next(
                        re.finditer(re.escape(item['surface']),
                                    test['req'])).span()
                except StopIteration:
                    print('Surface mismatch for "%s"' % test['req'])
github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
unit = c.Unit(
                        name=item['unit'],
                        dimensions=item['dimensions'],
                        entity=entity)
                try:
                    span = next(
                        re.finditer(re.escape(item['surface']),
                                    test['req'])).span()
                except StopIteration:
                    print('Surface mismatch for "%s"' % test['req'])
                    return
                uncert = None
                if 'uncertainty' in item:
                    uncert = item['uncertainty']
                res.append(
                    c.Quantity(
                        value=item['value'],
                        unit=unit,
                        surface=item['surface'],
                        span=span,
                        uncertainty=uncert))
            test['res'] = [i for i in res]

        return tests
github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
def test_parse_classifier(self):
        all_tests = load_tests(False) + load_tests(True)
        # forcedly activate classifier
        clf.USE_CLF = True
        for test in sorted(all_tests, key=lambda x: len(x['req'])):
            quants = p.parse(test['req'])
            self.assertEqual(
                quants, test['res'],
                "{} \n {}".format([quant.__dict__ for quant in quants],
                                  [quant.__dict__ for quant in test['res']]))
github nielstron / quantulum3 / quantulum3 / tests.py View on Github external
def test_training(self):
        # TODO - update test to not overwirte existing clf.pickle and wiki.json files.
        clf.train_classifier(False)
        clf.train_classifier(True)
github nielstron / quantulum3 / quantulum3 / parser.py View on Github external
def get_unit(item, text, lang="en_US"):
    """
    Extract unit from regex hit.
    """

    group_units = ["prefix", "unit1", "unit2", "unit3", "unit4"]
    group_operators = ["operator1", "operator2", "operator3", "operator4"]
    # How much of the end is removed because of an "incorrect" regex match
    unit_shortening = 0

    item_units = [item.group(i) for i in group_units if item.group(i)]

    if len(item_units) == 0:
        unit = load.units(lang).names["dimensionless"]
    else:
        derived, slash = [], False
        multiplication_operator = False
        for index in range(0, 5):
            unit = item.group(group_units[index])
            operator_index = None if index < 1 else group_operators[index - 1]
            operator = None if index < 1 else item.group(operator_index)

            # disallow spaces as operators in units expressed in their symbols
            # Enforce consistency among multiplication and division operators
            # Single exceptions are colloquial number abbreviations (5k miles)
            if operator in reg.multiplication_operators(lang) or (
                operator is None
                and unit
                and not (index == 1 and unit in reg.suffixes(lang))
            ):
github nielstron / quantulum3 / quantulum3 / classifier.py View on Github external
def disambiguate_unit(unit, text, lang="en_US"):
    """
    Resolve ambiguity between units with same names, symbols or abbreviations.
    """

    new_unit = (
        load.units(lang).symbols.get(unit)
        or load.units(lang).surfaces.get(unit)
        or load.units(lang).surfaces_lower.get(unit.lower())
        or load.units(lang).symbols_lower.get(unit.lower())
    )
    if not new_unit:
        raise KeyError('Could not find unit "%s" from "%s"' % (unit, text))

    if len(new_unit) > 1:
        transformed = classifier(lang).tfidf_model.transform([clean_text(text, lang)])
        scores = classifier(lang).classifier.predict_proba(transformed).tolist()[0]
        scores = zip(scores, classifier(lang).target_names)

        # Filter for possible names
        names = [i.name for i in new_unit]
        scores = [i for i in scores if i[1] in names]

        # Sort by rank
github nielstron / quantulum3 / quantulum3 / disambiguate.py View on Github external
def disambiguate_unit(unit_surface, text, lang="en_US"):
    """
    Resolve ambiguity between units with same names, symbols or abbreviations.
    :returns (str) unit name of the resolved unit
    """
    if clf.USE_CLF:
        base = clf.disambiguate_unit(unit_surface, text, lang).name
    else:
        base = (
            load.units(lang).symbols[unit_surface]
            or load.units(lang).surfaces[unit_surface]
            or load.units(lang).surfaces_lower[unit_surface.lower()]
            or load.units(lang).symbols_lower[unit_surface.lower()]
        )

        if len(base) > 1:
            base = no_clf.disambiguate_no_classifier(base, text, lang)
        elif len(base) == 1:
            base = next(iter(base))

        if base:
            base = base.name
        else:
            base = "unk"

    return base
github nielstron / quantulum3 / quantulum3 / no_classifier.py View on Github external
def disambiguate_no_classifier(entities, text, lang="en_US"):
    """
    Disambiguate the entity or unit without a classifier
    :param entities:
    :param text:
    :param lang:
    :return: a single entity or unit that has been chosen for
    """
    word_sets = load.training_set(lang)

    max_entity, max_count, max_relative = None, 0, 0
    for entity in entities:
        count = 0
        total = 0
        for word_set in word_sets:
            if word_set["unit"] == entity.name:
                total += len(word_set["text"])
                for word in word_set["text"].split(" "):
                    count += 1 if word in text else 0
        try:
            relative = count / total
        except ZeroDivisionError:
            relative = 0
        if relative > max_relative or (relative == max_relative and count > max_count):
            max_entity, max_count, max_relative = entity, count, relative