How to use the cytoolz.compose function in cytoolz

To help you get started, we’ve selected a few cytoolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github semanticize / semanticizest / test_wikidump.py View on Github external
def test_extract_links():
    first_link = compose(tuple, next, iter, extract_links)

    assert_equal(first_link("[[foo|bar]]"), ("foo", "bar"))
    assert_equal(first_link("[[foo]]"), ("foo", "foo"))
    assert_equal(first_link("[[File:picture!]] [[foo]]"), ("foo", "foo"))
    assert_equal(first_link("[[foo]]bar."), ("foo", "foobar"))
    assert_equal(first_link("[[baz|foobar]];"), ("baz", "foobar"))

    # This construct appears in enwiki for chemical formulae etc., but also in
    # nlwiki (and dewiki?) for more general compound nouns. The current
    # handling may not be exactly what we want; any fix should update the test
    # accordingly.
    assert_equal(list(extract_links("[[Lithium|Li]][[Fluorine|F]]")),
                 [("Lithium", "Li"), ("Fluorine", "F")])
    assert_equal(list(extract_links("[[tera-|tera]][[becquerel]]s")),
                 [("tera-", "tera"), ("becquerel", "becquerels")])
github eea / eea.corpus / src / eea.corpus / eea / corpus / utils.py View on Github external
if word.isnumeric():
            yield "*number*"
        yield word


def lower_words(words):
    yield from (w.lower() for w in words)


def filter_small_words(words):
    for w in words:
        if len(w) > 2:
            yield w


handle_text = compose(filter_small_words, lower_words, handle_numbers,
                      handle_slash, )


def tokenizer(text):
    """ Tokenizes text. Returns lists of tokens (words)
    """
    ignore_chars = "()*:\"><][#\n\t'^%?=&"

    for c in ignore_chars:
        text = text.replace(c, ' ')
    words = text.split(' ')

    text = list(handle_text(words))

    return text
github ChenRocks / fast_abs_rl / make_extraction_labels.py View on Github external
def label(split):
    start = time()
    print('start processing {} split...'.format(split))
    data_dir = join(DATA_DIR, split)
    dump_dir = join(DUMP_DIR, split)
    n_data = count_data(data_dir)
    for i in range(n_data):
        print('processing {}/{} ({:.2f}%%)\r'.format(i, n_data, 100*i/n_data),
              end='')
        with open(join(data_dir, '{}.json'.format(i))) as f:
            data = json.loads(f.read())
        tokenize = compose(list, _split_words)
        art_sents = tokenize(data['article'])
        abs_sents = tokenize(data['abstract'])
        extracted, scores = get_extract_label(art_sents, abs_sents)
        data['extracted'] = extracted
        data['score'] = scores
        with open(join(dump_dir, '{}.json'.format(i)), 'w') as f:
            json.dump(data, f, indent=4)
    print('finished in {}'.format(timedelta(seconds=time()-start)))
github thautwarm / Rem / remlang / compiler / module.py View on Github external
        lambda origin_func: compose(env['to_chinese'], origin_func))
github ethereum / trinity / trinity / protocol / eth / managers.py View on Github external
async def _normalize_response(self,
                                  response: Tuple[BlockBody, ...]) -> BlockBodyBundles:
        if not isinstance(response, tuple):
            raise MalformedMessage(
                "`GetBlockBodies` response must be a tuple. Got: {0}".format(type(response))
            )
        elif not all(isinstance(item, BlockBody) for item in response):
            raise MalformedMessage("`GetBlockBodies` response must be a tuple of block bodies")

        uncles_hashes = await self._run_in_executor(
            tuple,
            map(compose(keccak, rlp.encode), tuple(body.uncles for body in response)),
        )
        transaction_roots_and_trie_data = await self._run_in_executor(
            tuple,
            map(make_trie_root_and_nodes, tuple(body.transactions for body in response)),
        )

        body_bundles = tuple(zip(response, transaction_roots_and_trie_data, uncles_hashes))
        return body_bundles
github ethereum / web3.py / web3 / utils / signing.py View on Github external
def to_eth_v(v_raw, chain_id=None):
    if chain_id is None:
        v = v_raw + V_OFFSET
    else:
        v = v_raw + CHAIN_ID_OFFSET + 2 * chain_id
    return v


def sign_transaction_hash(account, transaction_hash, chain_id):
    signature = account.sign_msg_hash(transaction_hash)
    (v_raw, r, s) = signature.vrs
    v = to_eth_v(v_raw, chain_id)
    return (v, r, s)


to_bytes32 = compose(zpad_bytes(32), to_bytes)


def sign_message_hash(key, msg_hash):
    signature = key.sign_msg_hash(msg_hash)
    (v_raw, r, s) = signature.vrs
    v = to_eth_v(v_raw)
    eth_signature_bytes = to_bytes32(r) + to_bytes32(s) + to_bytes(v)
    return (v, r, s, eth_signature_bytes)


class LocalAccount(object):
    '''
    Collection of convenience methods on private key, roughly using the
    same API as web3.js: https://web3js.readthedocs.io/en/1.0/web3-eth-accounts.html#create
    '''
    def __init__(self, key, account):
github iwangjian / ByteCup2018 / train_extractor.py View on Github external
def build_batchers(data_dir, net_type, word2id, cuda, debug):
    assert net_type in ['ff', 'rnn']
    prepro = prepro_fn_extract(args.max_word, args.max_sent)

    def sort_key(sample):
        src_sents, _ = sample
        return len(src_sents)

    batchify_fn = (batchify_fn_extract_ff if net_type == 'ff'
                   else batchify_fn_extract_ptr)
    convert_batch = (convert_batch_extract_ff if net_type == 'ff'
                     else convert_batch_extract_ptr)
    batchify = compose(batchify_fn(PAD, cuda=cuda), convert_batch(UNK, word2id))

    train_loader = DataLoader(
        ExtractDataset('train', data_dir), batch_size=BUCKET_SIZE,
        shuffle=not debug,
        num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn_extract)
    val_loader = DataLoader(
        ExtractDataset('val', data_dir), batch_size=BUCKET_SIZE,
        shuffle=False, num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn_extract)

    train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
                                      single_run=False, fork=False)
    val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
                                    single_run=True, fork=False)
    return train_batcher, val_batcher
github ethereum / web3.py / web3 / account.py View on Github external
def hashMessage(data=None, hexstr=None, text=None):
        message_bytes = to_bytes(data, hexstr=hexstr, text=text)
        recovery_hasher = compose(HexBytes, keccak, signature_wrapper)
        return recovery_hasher(message_bytes)
github maszhongming / Effective_Extractive_Summarization / main.py View on Github external
def build_batchers(decoder, emb_type, word2id, cuda, debug):
    prepro = prepro_fn_extract(args.max_word, args.max_sent, emb_type)
    def sort_key(sample):
        src_sents, _ = sample
        return len(src_sents)
    batchify_fn = batchify_fn_extract_ptr
    convert_batch = convert_batch_extract_ptr
    batchify = compose(batchify_fn(PAD, cuda=cuda),
                       convert_batch(UNK, word2id, emb_type))

    train_loader = DataLoader(
        ExtractDataset('train'), batch_size=BUCKET_SIZE,
        shuffle=not debug,
        num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn_extract
    )
    train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
                                      single_run=False, fork=not debug)

    val_loader = DataLoader(
        ExtractDataset('val'), batch_size=BUCKET_SIZE,
        shuffle=False, num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn_extract
    )
github ChenRocks / fast_abs_rl / train_abstractor.py View on Github external
def build_batchers(word2id, cuda, debug):
    prepro = prepro_fn(args.max_art, args.max_abs)
    def sort_key(sample):
        src, target = sample
        return (len(target), len(src))
    batchify = compose(
        batchify_fn_copy(PAD, START, END, cuda=cuda),
        convert_batch_copy(UNK, word2id)
    )

    train_loader = DataLoader(
        MatchDataset('train'), batch_size=BUCKET_SIZE,
        shuffle=not debug,
        num_workers=4 if cuda and not debug else 0,
        collate_fn=coll_fn
    )
    train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
                                      single_run=False, fork=not debug)

    val_loader = DataLoader(
        MatchDataset('val'), batch_size=BUCKET_SIZE,
        shuffle=False, num_workers=4 if cuda and not debug else 0,