Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print('path:', path)
with tarfile.open(path) as tar:
train = get_stories(tar.extractfile(challenge.format('train')))
test = get_stories(tar.extractfile(challenge.format('test')))
vocab = set()
for story, q, a in train + test:
vocab |= set(story + q + [a])
vocab = sorted(vocab)
vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)
print('idx_query.shape:', idx_query.shape)
print('idx_answer.shape:', idx_answer.shape)
print('story max len:', story_maxlen)
print('query max len:', query_maxlen)
def train():
print('build model...')
sentence = keras.layers.Input(shape=(story_maxlen,), dtype='int32')
encoded_sentence = keras.layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
encoded_sentence = keras.layers.Dropout(0.3)(encoded_sentence)
with tarfile.open(path) as tar:
train = get_stories(tar.extractfile(challenge.format('train')))
test = get_stories(tar.extractfile(challenge.format('test')))
vocab = set()
for story, q, a in train + test:
vocab |= set(story + q + [a])
vocab = sorted(vocab)
vocab_size = len(vocab) + 1
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
idx_story, idx_query, idx_answer = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
test_idx_story, test_idx_query, test_idx_answer = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab:', vocab)
print('idx_story.shape:', idx_story.shape)
print('idx_query.shape:', idx_query.shape)
print('idx_answer.shape:', idx_answer.shape)
print('story max len:', story_maxlen)
print('query max len:', query_maxlen)
def train():
print('build model...')
sentence = keras.layers.Input(shape=(story_maxlen,), dtype='int32')
encoded_sentence = keras.layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
encoded_sentence = keras.layers.Dropout(0.3)(encoded_sentence)
question = keras.layers.Input(shape=(query_maxlen,), dtype='int32')