How to use the cytoolz.itertoolz.concat function in cytoolz

To help you get started, we’ve selected a few cytoolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github conda / conda / conda / common / path.py View on Github external
def explode_directories(child_directories, already_split=False):
    # get all directories including parents
    # use already_split=True for the result of get_all_directories()
    maybe_split = lambda x: x if already_split else x.split('/')
    return set(concat(accumulate(join, maybe_split(directory)) for directory in child_directories))
github vlukiyanov / pt-sdae / ptsdae / sdae.py View on Github external
self.embedding_dimension = dimensions[0]
        self.hidden_dimension = dimensions[-1]
        # construct the encoder
        encoder_units = build_units(self.dimensions[:-1], activation)
        encoder_units.extend(
            build_units([self.dimensions[-2], self.dimensions[-1]], None)
        )
        self.encoder = nn.Sequential(*encoder_units)
        # construct the decoder
        decoder_units = build_units(reversed(self.dimensions[1:]), activation)
        decoder_units.extend(
            build_units([self.dimensions[1], self.dimensions[0]], final_activation)
        )
        self.decoder = nn.Sequential(*decoder_units)
        # initialise the weights and biases in the layers
        for layer in concat([self.encoder, self.decoder]):
            weight_init(layer[0].weight, layer[0].bias, gain)
github conda / conda / conda / core / solve.py View on Github external
prepared_specs = set(concatv(
            specs_to_remove,
            specs_to_add,
            itervalues(specs_from_history_map),
        ))

        index, r = self._prepare(prepared_specs)

        if specs_to_remove:
            # In a previous implementation, we invoked SAT here via `r.remove()` to help with
            # spec removal, and then later invoking SAT again via `r.solve()`. Rather than invoking
            # SAT for spec removal determination, we can use the PrefixGraph and simple tree
            # traversal if we're careful about how we handle features. We still invoke sat via
            # `r.solve()` later.
            _track_fts_specs = (spec for spec in specs_to_remove if 'track_features' in spec)
            feature_names = set(concat(spec.get_raw_value('track_features')
                                       for spec in _track_fts_specs))
            graph = PrefixGraph((index[dist] for dist in solution), itervalues(specs_map))

            removed_records = []
            for spec in specs_to_remove:
                # If the spec was a track_features spec, then we need to also remove every
                # package with a feature that matches the track_feature. The
                # `graph.remove_spec()` method handles that for us.
                log.trace("using PrefixGraph to remove records for %s", spec)
                removed_records.extend(graph.remove_spec(spec))

            for rec in removed_records:
                # We keep specs (minus the feature part) for the non provides_features packages
                # if they're in the history specs.  Otherwise, we pop them from the specs_map.
                rec_has_a_feature = set(rec.features or ()) & feature_names
                if rec_has_a_feature and rec.name in specs_from_history_map:
github chartbeat-labs / textacy / textacy / extract.py View on Github external
Yields:
        (:class:`spacy.tokens.Span`, :class:`spacy.tokens.Token`, :class:`spacy.tokens.Span`): next quotation in ``doc``
        represented as a (speaker, reporting verb, quotation) 3-tuple

    Notes:
        Loosely inspired by Krestel, Bergler, Witte. "Minding the Source: Automatic
        Tagging of Reported Speech in Newspaper Articles".

    TODO: Better approach would use ML, but needs a training dataset.
    """
    doc_lang = doc.vocab.lang
    if doc_lang != "en":
        raise NotImplementedError("sorry, English-language texts only :(")
    quote_end_punct = {",", ".", "?", "!"}
    quote_indexes = set(
        itertoolz.concat(
            (m.start(), m.end() - 1)
            for m in re.finditer(r"(\".*?\")|(''.*?'')|(``.*?'')", doc.text)
        )
    )
    quote_positions = list(
        itertoolz.partition(2, sorted(tok.i for tok in doc if tok.idx in quote_indexes))
    )
    sents = list(doc.sents)
    sent_positions = [(sent.start, sent.end) for sent in sents]

    for q0, q1 in quote_positions:
        quote = doc[q0 : q1 + 1]

        # we're only looking for direct quotes, not indirect or mixed
        if not any(char in quote_end_punct for char in quote.text[-4:]):
            continue
github chartbeat-labs / textacy / scripts / train_lang_identifier.py View on Github external
"""
    Args:
        seq (Sequence)
        n (int)
        stratify (bool)
        random_state (int)

    Returns:
        list
    """
    random.seed(a=random_state)
    if stratify is True:
        grped = itertoolz.groupby(operator.itemgetter(1), seq)
        n_per_grp = max(int(round(n / len(grped))), 1)
        sample = list(
            itertoolz.concat(
                random.sample(examples, min(len(examples), n_per_grp))
                for examples in grped.values()
            )
        )
        random.shuffle(sample)
        return sample[:n]
    else:
        return random.sample(seq, min(len(seq), n))
github conda / conda / conda / common / configuration.py View on Github external
raise InvalidTypeError(self.name, value, match.source, value.__class__.__name__,
                                       self._type.__name__)

        # get individual lines from important_matches that were marked important
        # these will be prepended to the final result
        def get_marked_lines(match, marker, parameter_obj):
            return tuple(line
                         for line, flag in zip(match.value(parameter_obj),
                                               match.valueflags(parameter_obj))
                         if flag is marker) if match else ()
        top_lines = concat(get_marked_lines(m, ParameterFlag.top, self) for m, _ in
                           relevant_matches_and_values)

        # also get lines that were marked as bottom, but reverse the match order so that lines
        # coming earlier will ultimately be last
        bottom_lines = concat(get_marked_lines(m, ParameterFlag.bottom, self) for m, _ in
                              reversed(relevant_matches_and_values))

        # now, concat all lines, while reversing the matches
        #   reverse because elements closer to the end of search path take precedence
        all_lines = concat(v for _, v in reversed(relevant_matches_and_values))

        # stack top_lines + all_lines, then de-dupe
        top_deduped = tuple(unique(concatv(top_lines, all_lines)))

        # take the top-deduped lines, reverse them, and concat with reversed bottom_lines
        # this gives us the reverse of the order we want, but almost there
        # NOTE: for a line value marked both top and bottom, the bottom marker will win out
        #       for the top marker to win out, we'd need one additional de-dupe step
        bottom_deduped = unique(concatv(reversed(tuple(bottom_lines)), reversed(top_deduped)))
        # just reverse, and we're good to go
        return tuple(reversed(tuple(bottom_deduped)))
github chartbeat-labs / textacy / textacy / tm / coherence.py View on Github external
if tok.orth_ and not tok.is_stop
                                 and not tok.is_punct if not tok.is_space).intersection(top_terms)
                       for spacy_doc in spacy_docs)
    else:
        raise ValueError()

    # confusing bit of code... it iterates over all sub-documents,
    # concatenating all single and term-pair combinations into one iterable per sub-doc,
    # as well as adding 1 to an iterable once per sub-doc
    # then it splits these two streams, summing the 1s to get the total number of sub-docs
    # and concatenating all sub-docs into a single iterable used to initialize a counter in one go
    # it's slightly faster than just using a for loop over all subdocs... :shrug:
    ones, sds = zip(*((1, itertoolz.concatv(subdoc, itertools.combinations(sorted(subdoc), 2)))
                    for subdoc in subdocs))
    n_subdocs = sum(ones)
    term_probs = collections.Counter(itertoolz.concat(sds))

    if n_subdocs > 1:
        term_probs = dicttoolz.valmap(lambda x: x / n_subdocs, term_probs,
                                      factory=collections.Counter)

    return term_probs
github conda / conda / conda / core / path_actions.py View on Github external
def files_from_action(link_path_action):
            if isinstance(link_path_action, CompileMultiPycAction):
                return link_path_action.target_short_paths
            else:
                return (link_path_action.target_short_path, )

        def paths_from_action(link_path_action):
            if isinstance(link_path_action, CompileMultiPycAction):
                return link_path_action.prefix_paths_data
            else:
                if link_path_action.prefix_path_data is None:
                    return ()
                else:
                    return (link_path_action.prefix_path_data, )

        files = concat((files_from_action(x) for x in self.all_link_path_actions if x))
        paths_data = PathsData(
            paths_version=1,
            paths=concat((paths_from_action(x) for x in self.all_link_path_actions if x)),
        )

        self.prefix_record = PrefixRecord.from_objects(
            self.package_info.repodata_record,
            self.package_info.index_json_record,
            self.package_info.package_metadata,
            requested_spec=text_type(self.requested_spec),
            paths_data=paths_data,
            files=files,
            link=link,
            url=self.package_info.url,
            extracted_package_dir=extracted_package_dir,
            package_tarball_full_path=package_tarball_full_path,