Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_extract_links():
first_link = compose(tuple, next, iter, extract_links)
assert_equal(first_link("[[foo|bar]]"), ("foo", "bar"))
assert_equal(first_link("[[foo]]"), ("foo", "foo"))
assert_equal(first_link("[[File:picture!]] [[foo]]"), ("foo", "foo"))
assert_equal(first_link("[[foo]]bar."), ("foo", "foobar"))
assert_equal(first_link("[[baz|foobar]];"), ("baz", "foobar"))
# This construct appears in enwiki for chemical formulae etc., but also in
# nlwiki (and dewiki?) for more general compound nouns. The current
# handling may not be exactly what we want; any fix should update the test
# accordingly.
assert_equal(list(extract_links("[[Lithium|Li]][[Fluorine|F]]")),
[("Lithium", "Li"), ("Fluorine", "F")])
assert_equal(list(extract_links("[[tera-|tera]][[becquerel]]s")),
[("tera-", "tera"), ("becquerel", "becquerels")])
if word.isnumeric():
yield "*number*"
yield word
def lower_words(words):
yield from (w.lower() for w in words)
def filter_small_words(words):
for w in words:
if len(w) > 2:
yield w
handle_text = compose(filter_small_words, lower_words, handle_numbers,
handle_slash, )
def tokenizer(text):
""" Tokenizes text. Returns lists of tokens (words)
"""
ignore_chars = "()*:\"><][#\n\t'^%?=&"
for c in ignore_chars:
text = text.replace(c, ' ')
words = text.split(' ')
text = list(handle_text(words))
return text
def label(split):
start = time()
print('start processing {} split...'.format(split))
data_dir = join(DATA_DIR, split)
dump_dir = join(DUMP_DIR, split)
n_data = count_data(data_dir)
for i in range(n_data):
print('processing {}/{} ({:.2f}%%)\r'.format(i, n_data, 100*i/n_data),
end='')
with open(join(data_dir, '{}.json'.format(i))) as f:
data = json.loads(f.read())
tokenize = compose(list, _split_words)
art_sents = tokenize(data['article'])
abs_sents = tokenize(data['abstract'])
extracted, scores = get_extract_label(art_sents, abs_sents)
data['extracted'] = extracted
data['score'] = scores
with open(join(dump_dir, '{}.json'.format(i)), 'w') as f:
json.dump(data, f, indent=4)
print('finished in {}'.format(timedelta(seconds=time()-start)))
lambda origin_func: compose(env['to_chinese'], origin_func))
async def _normalize_response(self,
response: Tuple[BlockBody, ...]) -> BlockBodyBundles:
if not isinstance(response, tuple):
raise MalformedMessage(
"`GetBlockBodies` response must be a tuple. Got: {0}".format(type(response))
)
elif not all(isinstance(item, BlockBody) for item in response):
raise MalformedMessage("`GetBlockBodies` response must be a tuple of block bodies")
uncles_hashes = await self._run_in_executor(
tuple,
map(compose(keccak, rlp.encode), tuple(body.uncles for body in response)),
)
transaction_roots_and_trie_data = await self._run_in_executor(
tuple,
map(make_trie_root_and_nodes, tuple(body.transactions for body in response)),
)
body_bundles = tuple(zip(response, transaction_roots_and_trie_data, uncles_hashes))
return body_bundles
def to_eth_v(v_raw, chain_id=None):
if chain_id is None:
v = v_raw + V_OFFSET
else:
v = v_raw + CHAIN_ID_OFFSET + 2 * chain_id
return v
def sign_transaction_hash(account, transaction_hash, chain_id):
signature = account.sign_msg_hash(transaction_hash)
(v_raw, r, s) = signature.vrs
v = to_eth_v(v_raw, chain_id)
return (v, r, s)
to_bytes32 = compose(zpad_bytes(32), to_bytes)
def sign_message_hash(key, msg_hash):
signature = key.sign_msg_hash(msg_hash)
(v_raw, r, s) = signature.vrs
v = to_eth_v(v_raw)
eth_signature_bytes = to_bytes32(r) + to_bytes32(s) + to_bytes(v)
return (v, r, s, eth_signature_bytes)
class LocalAccount(object):
'''
Collection of convenience methods on private key, roughly using the
same API as web3.js: https://web3js.readthedocs.io/en/1.0/web3-eth-accounts.html#create
'''
def __init__(self, key, account):
def build_batchers(data_dir, net_type, word2id, cuda, debug):
assert net_type in ['ff', 'rnn']
prepro = prepro_fn_extract(args.max_word, args.max_sent)
def sort_key(sample):
src_sents, _ = sample
return len(src_sents)
batchify_fn = (batchify_fn_extract_ff if net_type == 'ff'
else batchify_fn_extract_ptr)
convert_batch = (convert_batch_extract_ff if net_type == 'ff'
else convert_batch_extract_ptr)
batchify = compose(batchify_fn(PAD, cuda=cuda), convert_batch(UNK, word2id))
train_loader = DataLoader(
ExtractDataset('train', data_dir), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract)
val_loader = DataLoader(
ExtractDataset('val', data_dir), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=False)
val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
single_run=True, fork=False)
return train_batcher, val_batcher
def hashMessage(data=None, hexstr=None, text=None):
message_bytes = to_bytes(data, hexstr=hexstr, text=text)
recovery_hasher = compose(HexBytes, keccak, signature_wrapper)
return recovery_hasher(message_bytes)
def build_batchers(decoder, emb_type, word2id, cuda, debug):
prepro = prepro_fn_extract(args.max_word, args.max_sent, emb_type)
def sort_key(sample):
src_sents, _ = sample
return len(src_sents)
batchify_fn = batchify_fn_extract_ptr
convert_batch = convert_batch_extract_ptr
batchify = compose(batchify_fn(PAD, cuda=cuda),
convert_batch(UNK, word2id, emb_type))
train_loader = DataLoader(
ExtractDataset('train'), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract
)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=not debug)
val_loader = DataLoader(
ExtractDataset('val'), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract
)
def build_batchers(word2id, cuda, debug):
prepro = prepro_fn(args.max_art, args.max_abs)
def sort_key(sample):
src, target = sample
return (len(target), len(src))
batchify = compose(
batchify_fn_copy(PAD, START, END, cuda=cuda),
convert_batch_copy(UNK, word2id)
)
train_loader = DataLoader(
MatchDataset('train'), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn
)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=not debug)
val_loader = DataLoader(
MatchDataset('val'), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,