How to use the quantulum3.regex function in quantulum3

To help you get started, we’ve selected a few quantulum3 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nielstron / quantulum3 / quantulum3 / parser.py View on Github external
operator_index = group_operators[index - 2]
                        # Remove (original length - new end) characters
                        unit_shortening = item.end() - item.start(operator_index)
                        _LOGGER.debug(
                            "Because operator inconsistency, cut from "
                            "operator: '{}', new surface: {}".format(
                                operator,
                                text[item.start() : item.end() - unit_shortening],
                            )
                        )
                        break

            # Determine whether a negative power has to be applied to following
            # units
            if operator and not slash:
                slash = any(i in operator for i in reg.division_operators(lang))
            # Determine which unit follows
            if unit:
                unit_surface, power = parse_unit(item, unit, slash, lang)
                base = dis.disambiguate_unit(unit_surface, text, lang)
                derived += [{"base": base, "power": power, "surface": unit_surface}]

        unit = get_unit_from_dimensions(derived, text, lang)

    _LOGGER.debug("\tUnit: %s", unit)
    _LOGGER.debug("\tEntity: %s", unit.entity)

    return unit, unit_shortening
github nielstron / quantulum3 / quantulum3 / parser.py View on Github external
logging.basicConfig(format=log_format)

    if verbose:  # pragma: no cover
        prev_level = logging.root.getEffectiveLevel()
        logging.root.setLevel(logging.DEBUG)
        _LOGGER.debug("Verbose mode")

    orig_text = text
    _LOGGER.debug('Original text: "%s"', orig_text)

    text = clean_text(text, lang)
    values = extract_spellout_values(text, lang)
    text, shifts = substitute_values(text, values)

    quantities = []
    for item in reg.units_regex(lang).finditer(text):

        groups = dict([i for i in item.groupdict().items() if i[1] and i[1].strip()])
        _LOGGER.debug(u"Quantity found: %s", groups)

        try:
            uncert, values = get_values(item, lang)

            unit, unit_shortening = get_unit(item, text)
            surface, span = get_surface(shifts, orig_text, item, text, unit_shortening)
            objs = build_quantity(
                orig_text, text, item, values, unit, surface, span, uncert, lang
            )
            if objs is not None:
                quantities += objs
        except ValueError as err:
            _LOGGER.debug("Could not parse quantity: %s", err)
github nielstron / quantulum3 / quantulum3 / parser.py View on Github external
item_units = [item.group(i) for i in group_units if item.group(i)]

    if len(item_units) == 0:
        unit = load.units(lang).names["dimensionless"]
    else:
        derived, slash = [], False
        multiplication_operator = False
        for index in range(0, 5):
            unit = item.group(group_units[index])
            operator_index = None if index < 1 else group_operators[index - 1]
            operator = None if index < 1 else item.group(operator_index)

            # disallow spaces as operators in units expressed in their symbols
            # Enforce consistency among multiplication and division operators
            # Single exceptions are colloquial number abbreviations (5k miles)
            if operator in reg.multiplication_operators(lang) or (
                operator is None
                and unit
                and not (index == 1 and unit in reg.suffixes(lang))
            ):
                if multiplication_operator != operator and not (
                    index == 1 and str(operator).isspace()
                ):
                    if multiplication_operator is False:
                        multiplication_operator = operator
                    else:
                        # Cut if inconsistent multiplication operator
                        # treat the None operator differently - remove the
                        # whole word of it
                        if operator is None:
                            # For this, use the last consistent operator
                            # (before the current) with a space
github nielstron / quantulum3 / quantulum3 / parser.py View on Github external
def resolve_exponents(value, lang="en_US"):
    """Resolve unusual exponents (like 2^4) and return substituted string and
       factor

    Params:
        value: str, string with only one value
    Returns:
        str, string with basis and exponent removed
        array of float, factors for multiplication

    """
    factors = []
    matches = re.finditer(
        reg.number_pattern_groups(lang), value, re.IGNORECASE | re.VERBOSE
    )
    for item in matches:
        if item.group("base") and item.group("exponent"):
            base = item.group("base")
            exp = item.group("exponent")
            if base in ["e", "E"]:
                # already handled by float
                factors.append(1)
                continue
                # exp = '10'
            # Expect that in a pure decimal base,
            # either ^ or superscript notation is used
            if re.match(r"\d+\^?", base):
                if not (
                    "^" in base
                    or re.match(r"[%s]" % reg.unicode_superscript_regex(), exp)
github nielstron / quantulum3 / quantulum3 / parser.py View on Github external
exp = item.group("exponent")
            if base in ["e", "E"]:
                # already handled by float
                factors.append(1)
                continue
                # exp = '10'
            # Expect that in a pure decimal base,
            # either ^ or superscript notation is used
            if re.match(r"\d+\^?", base):
                if not (
                    "^" in base
                    or re.match(r"[%s]" % reg.unicode_superscript_regex(), exp)
                ):
                    factors.append(1)
                    continue
            for superscript, substitute in reg.unicode_superscript().items():
                exp.replace(superscript, substitute)
            exp = float(exp)
            base = float(base.replace("^", ""))
            factor = base ** exp
            stripped = str(value).replace(item.group("scale"), "")
            value = stripped
            factors.append(factor)
            _LOGGER.debug(
                "Replaced {} by factor {}".format(item.group("scale"), factor)
            )
        else:
            factors.append(1)
            continue
    return value, factors
github nielstron / quantulum3 / quantulum3 / _lang / en_US / parser.py View on Github external
"""
    Parse surface and power from unit text.
    """

    surface = unit.replace(".", "")
    power = re.findall(r"-?[0-9%s]+" % reg.unicode_superscript_regex(), surface)
    power_written = re.findall(r"\b(%s)\b" % "|".join(reg.powers(lang)), surface)

    if power:
        power = [
            reg.unicode_superscript()[i] if i in reg.unicode_superscript() else i
            for i in power
        ]
        power = "".join(power)
        new_power = -1 * int(power) if slash else int(power)
        surface = re.sub(r"\^?-?[0-9%s]+" % reg.unicode_superscript(), "", surface)

    elif power_written:
        exponent = reg.powers(lang)[power_written[0]]
        new_power = -exponent if slash else exponent
        surface = re.sub(r"\b%s\b" % power_written[0], "", surface).strip()

    else:
        new_power = -1 if slash else 1

    return surface, new_power