How to use the funcy.first function in funcy

To help you get started, we’ve selected a few funcy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kensho-technologies / bubs / bubs / helpers.py View on Github external
def _reverse_inputs_and_indices(encoded_sentence_forward, output_index_list_forward):
    """Reverse sequence of character codes and list of output indices."""
    if len(encoded_sentence_forward) >= 2:  # sentence should at least have start, end characters
        start_sentence_value = first(encoded_sentence_forward)
        end_sentence_value = last(encoded_sentence_forward)
        encoded_sentence_length = len(encoded_sentence_forward)

        # Reverse all character codes in the sentence without affecting the first and last elements
        # (those are special start_sentence_value and end_sentence_value)
        encoded_sentence_back = [start_sentence_value]
        encoded_sentence_back.extend(encoded_sentence_forward[-2:0:-1])  # skip start and end
        encoded_sentence_back.append(end_sentence_value)
    else:
        encoded_sentence_back = []

    # compute backward output indices
    if len(output_index_list_forward) == 0:
        locations_before_tokens = []
    else:
        locations_before_tokens = [0] + output_index_list_forward[:-1]
github idrdex / star-django / tags / annotate_core.py View on Github external
def update_canonical(canonical_pk):
    canonical = SeriesAnnotation.objects.select_for_update().get(pk=canonical_pk)
    raw_annos = canonical.raw_annotations.prefetch_related('sample_annotations') \
                                         .filter(is_active=True).order_by('pk')
    # Disable if no raw sources
    canonical.is_active = bool(raw_annos)

    kappas = [a.best_kappa for a in raw_annos if a.best_kappa]
    best_cohens_kappa = max(kappas) if kappas else None

    # Update canonical sample annotations
    source = first(a for a in raw_annos if a.agrees_with_id) \
        or first(a for a in raw_annos if a.best_kappa == best_cohens_kappa
            and a.best_kappa is not None and a.best_kappa > 0) \
        or first(raw_annos)
    if source and not is_samples_concordant(canonical, source):
        canonical.sample_annotations.all().delete()
        canonical.fill_samples([(s.sample_id, s.annotation)
                                for s in source.sample_annotations.all()])

    # Update canonical stats
    if source:
        canonical.column = source.column
        canonical.regex = source.regex
    # Calculate fleiss kappa for all existing annotations/validations
    canonical.fleiss_kappa = _fleiss_kappa([a.sample_annotations.all() for a in raw_annos]) \
        if raw_annos else None
    canonical.best_cohens_kappa = best_cohens_kappa
    canonical.annotations = raw_annos.count()
    canonical.authors = len(set(a.created_by_id for a in raw_annos))
    canonical.save()
github harrystech / arthur-redshift-etl / python / etl / design / load.py View on Github external
validate_semantics_of_table_or_ctas(table_design)

    if "depends_on" in table_design:
        raise TableDesignSemanticError("upstream table '%s' has dependencies listed" % table_design["name"])

    constraints = table_design.get("constraints", [])
    constraint_types_in_design = [constraint_type for constraint in constraints for constraint_type in constraint]
    for constraint_type in constraint_types_in_design:
        if constraint_type in ("natural_key", "surrogate_key"):
            raise TableDesignSemanticError(
                "upstream table '%s' has unexpected %s constraint" % (table_design["name"], constraint_type)
            )

    [split_by_name] = table_design.get("extract_settings", {}).get("split_by", [None])
    if split_by_name:
        split_by_column = fy.first(fy.where(table_design["columns"], name=split_by_name))
        if split_by_column.get("skipped", False):
            raise TableDesignSemanticError("split-by column must not be skipped")
        if not split_by_column.get("not_null", False):
            raise TableDesignSemanticError("split-by column must have not-null constraint")
        if split_by_column["type"] not in ("int", "long", "date", "timestamp"):
            raise TableDesignSemanticError(
                "type of split-by column must be int, long, date or timestamp, not '{}'".format(split_by_column["type"])
            )
github kensho-technologies / bubs / bubs / helpers.py View on Github external
token_lists: list of list of str. sentences and tokens in document.
        document: str. raw text of predicted document

    Returns:
        document_span_lists: A list of lists of tuples of int (start, end)
    """
    sentence_lengths = [last(span_list)[-1] for span_list in span_lists]
    sentence_starts = []
    offset = 0

    # We have to base our location off of the original document to deal with weird sentences
    # For example: "Yuliya loves cats.    Ray loves dogs." or the case where as sentence is split
    # Mid-word due to exceeding the max sentence list
    # We select the first length, and the second sentence and so on to get the offsets
    for length, token_list in zip(sentence_lengths, token_lists):
        next_start = document[offset:].find(first(token_list))
        offset = offset + next_start
        sentence_starts.append(offset)
        offset = offset + length

    # Modify our sentence indices so that the sentences line up with the original text
    document_span_lists = []
    for start, span_list in zip(sentence_starts, span_lists):
        document_span_lists.append(
            [[span_start + start, span_end + start] for (span_start, span_end) in span_list]
        )

    return document_span_lists
github kensho-technologies / graphql-compiler / graphql_compiler / compiler / helpers.py View on Github external
def get_only_element_from_collection(one_element_collection):
    """Assert that the collection has exactly one element, then return that element."""
    if len(one_element_collection) != 1:
        raise AssertionError(
            u"Expected a collection with exactly one element, but got: {}".format(
                one_element_collection
            )
        )
    return funcy.first(one_element_collection)
github kensho-technologies / bubs / bubs / helpers.py View on Github external
def _shift_spans_to_start_at_zero(spans):
    """Shift all spans in the sentence by the same amount so the first token starts at zero.

    Args:
        spans: list of lists of character-level spans, one span per token, one list per sentence

    Returns:
        list of list of spans shifted so that first token in each sentence starts at zero
    """
    adjusted_spans = []
    for span_list in spans:
        if len(span_list) > 0:
            offset = first(span_list)[0]
            adjusted_spans.append([(span[0] - offset, span[1] - offset) for span in span_list])
        else:
            adjusted_spans.append([])
    return adjusted_spans
github SteemData / steemdata-mongo / src / methods.py View on Github external
def account_from_auths():
        return first(op.get('required_auths', op.get('required_posting_auths')))
github harrystech / arthur-redshift-etl / python / etl / extract / sqoop.py View on Github external
def build_sqoop_partition_options(
        self, relation: RelationDescription, partition_key: Optional[str], table_size: int
    ) -> List[str]:
        """
        Build the partitioning-related arguments for Sqoop.
        """
        if partition_key:
            column = fy.first(fy.where(relation.table_design["columns"], name=partition_key))
            if column["type"] in ("date", "timestamp"):
                quoted_key_arg = """CAST(DATE_PART('epoch', "{}") AS BIGINT)""".format(partition_key)
            else:
                quoted_key_arg = '"{}"'.format(partition_key)

            if relation.num_partitions:
                # num_partitions explicitly set in the design file overrides the dynamic determination.
                num_mappers = min(relation.num_partitions, self.max_partitions)
            else:
                num_mappers = self.maximize_partitions(table_size)

            if num_mappers > 1:
                return ["--split-by", quoted_key_arg, "--num-mappers", str(num_mappers)]

        # Use 1 mapper if either there is no partition key, or if the partitioner returns only one partition
        return ["--num-mappers", "1"]