How to use the babi.util.vectorize_stories function in babi

To help you get started, we’ve selected a few babi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibing624 / python-tutorial / 07keras / babi / babi_rnn.py View on Github external
print('path:', path)
with tarfile.open(path) as tar:
    train = get_stories(tar.extractfile(challenge.format('train')))
    test = get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, a in train + test:
    vocab |= set(story + q + [a])
vocab = sorted(vocab)

vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))

idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)
print('idx_query.shape:', idx_query.shape)
print('idx_answer.shape:', idx_answer.shape)
print('story max len:', story_maxlen)
print('query max len:', query_maxlen)


def train():
    print('build model...')

    sentence = keras.layers.Input(shape=(story_maxlen,), dtype='int32')
    encoded_sentence = keras.layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
    encoded_sentence = keras.layers.Dropout(0.3)(encoded_sentence)
github shibing624 / python-tutorial / 07keras / babi / babi_rnn.py View on Github external
with tarfile.open(path) as tar:
    train = get_stories(tar.extractfile(challenge.format('train')))
    test = get_stories(tar.extractfile(challenge.format('test')))

vocab = set()
for story, q, a in train + test:
    vocab |= set(story + q + [a])
vocab = sorted(vocab)

vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))

idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)
print('idx_query.shape:', idx_query.shape)
print('idx_answer.shape:', idx_answer.shape)
print('story max len:', story_maxlen)
print('query max len:', query_maxlen)


def train():
    print('build model...')

    sentence = keras.layers.Input(shape=(story_maxlen,), dtype='int32')
    encoded_sentence = keras.layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
    encoded_sentence = keras.layers.Dropout(0.3)(encoded_sentence)

    question = keras.layers.Input(shape=(query_maxlen,), dtype='int32')