How to use the babi.util.get_stories function in babi

To help you get started, we’ve selected a few babi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibing624 / python-tutorial / 07keras / babi / babi_rnn.py View on Github external
EMBED_HIDDEN_SIZE = 50
SENT_HIDDEN_SIZE = 100
QUERY_HIDDEN_SIZE = 100
BATCH_SIZE = 32
EPOCH = 2
save_model_path = 'babi_rnn_model.h5'
print("RNN,Embed,Sent,Query={},{},{},{}".format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))

challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
pwd_path = os.path.abspath(os.path.dirname(__file__))
print('pwd_path:', pwd_path)
path = os.path.join(pwd_path, '../../data/babi_tasks_1-20_v1-2.tar.gz')
print('path:', path)
with tarfile.open(path) as tar:
    train = get_stories(tar.extractfile(challenge.format('train')))
    test = get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, a in train + test:
    vocab |= set(story + q + [a])
vocab = sorted(vocab)

vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))

idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)
print('idx_query.shape:', idx_query.shape)
github shibing624 / python-tutorial / 07keras / babi / babi_rnn.py View on Github external
RNN = keras.layers.recurrent.LSTM
EMBED_HIDDEN_SIZE = 50
SENT_HIDDEN_SIZE = 100
QUERY_HIDDEN_SIZE = 100
BATCH_SIZE = 32
EPOCH = 2
save_model_path = 'babi_rnn_model.h5'
print("RNN,Embed,Sent,Query={},{},{},{}".format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))

challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
pwd_path = os.path.abspath(os.path.dirname(__file__))
print('pwd_path:', pwd_path)
path = os.path.join(pwd_path, '../../data/babi_tasks_1-20_v1-2.tar.gz')
print('path:', path)
with tarfile.open(path) as tar:
    train = get_stories(tar.extractfile(challenge.format('train')))
    test = get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, a in train + test:
    vocab |= set(story + q + [a])
vocab = sorted(vocab)

vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))

idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)