How to use t5 - 10 common examples

To help you get started, we’ve selected a few t5 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github google-research / text-to-text-transfer-transformer / t5 / data / test_utils.py View on Github external
def sentencepiece_vocab(extra_ids=0):
  return sentencepiece_vocabulary.SentencePieceVocabulary(
      os.path.join(TEST_DATA_DIR, "sentencepiece", "sentencepiece.model"),
      extra_ids=extra_ids)
github google-research / text-to-text-transfer-transformer / t5 / data / test_utils.py View on Github external
import collections
import os
import shutil

from absl import logging
from absl.testing import absltest
import numpy as np
import six
from t5.data import sentencepiece_vocabulary
from t5.data import utils as dataset_utils
import tensorflow.compat.v1 as tf
import tensorflow_datasets as tfds

TaskRegistry = dataset_utils.TaskRegistry
MixtureRegistry = dataset_utils.MixtureRegistry

mock = absltest.mock

TEST_DATA_DIR = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), "test_data")


# _ProxyTest is required because py2 does not allow instantiating
# absltest.TestCase directly.
class _ProxyTest(absltest.TestCase):
  """Instance of TestCase to reuse methods for testing."""
  maxDiff = None

  def runTest(self):
    pass
github google-research / text-to-text-transfer-transformer / t5 / data / test_utils.py View on Github external
from __future__ import print_function

import collections
import os
import shutil

from absl import logging
from absl.testing import absltest
import numpy as np
import six
from t5.data import sentencepiece_vocabulary
from t5.data import utils as dataset_utils
import tensorflow.compat.v1 as tf
import tensorflow_datasets as tfds

TaskRegistry = dataset_utils.TaskRegistry
MixtureRegistry = dataset_utils.MixtureRegistry

mock = absltest.mock

TEST_DATA_DIR = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), "test_data")


# _ProxyTest is required because py2 does not allow instantiating
# absltest.TestCase directly.
class _ProxyTest(absltest.TestCase):
  """Instance of TestCase to reuse methods for testing."""
  maxDiff = None

  def runTest(self):
    pass
github google-research / text-to-text-transfer-transformer / t5 / data / test_utils.py View on Github external
_FAKE_CACHED_DATASET["train"], [2, 1], _dump_examples_to_tfrecord)
    _dump_fake_dataset(
        os.path.join(cached_task_dir, "validation.tfrecord"),
        _FAKE_CACHED_DATASET["validation"], [2], _dump_examples_to_tfrecord)

    # Prepare uncached TfdsTask.
    add_tfds_task("uncached_task")
    self.uncached_task = TaskRegistry.get("uncached_task")

    # Prepare uncached TextLineTask.
    _dump_fake_dataset(
        os.path.join(self.test_data_dir, "train.tsv"),
        _FAKE_DATASET["train"], [2, 1], _dump_examples_to_tsv)
    TaskRegistry.add(
        "text_line_task",
        dataset_utils.TextLineTask,
        split_to_filepattern={
            "train": os.path.join(self.test_data_dir, "train.tsv*"),
        },
        skip_header_lines=1,
        text_preprocessor=[_split_tsv_preprocessor, test_text_preprocessor],
        sentencepiece_model_path=os.path.join(
            TEST_DATA_DIR, "sentencepiece", "sentencepiece.model"),
        metric_fns=[])
    self.text_line_task = TaskRegistry.get("text_line_task")

    # Auto-verify any split by just retuning the split name
    dataset_utils.verify_tfds_split = absltest.mock.Mock(
        side_effect=lambda x, y: y
    )
github google-research / text-to-text-transfer-transformer / t5 / data / test_utils.py View on Github external
def add_tfds_task(
    name,
    tfds_name="fake:0.0.0",
    text_preprocessor=test_text_preprocessor,
    token_preprocessor=None,
    splits=None):
  TaskRegistry.add(
      name,
      dataset_utils.TfdsTask,
      tfds_name=tfds_name,
      text_preprocessor=text_preprocessor,
      token_preprocessor=token_preprocessor,
      sentencepiece_model_path=os.path.join(TEST_DATA_DIR, "sentencepiece",
                                            "sentencepiece.model"),
      metric_fns=[],
      splits=splits)
github google-research / text-to-text-transfer-transformer / t5 / data / test_utils.py View on Github external
def add_fake_tfds(fake_tfds):
  dataset_utils.LazyTfdsLoader._MEMOIZED_INSTANCES[  # pylint:disable=protected-access
      (fake_tfds.name, None)] = fake_tfds
github google-research / text-to-text-transfer-transformer / t5 / evaluation / eval_utils.py View on Github external
class Metric(object):

  def __init__(self, name, group=None):
    self.name = name
    self.group = group or name

# This OrderedDict maps TensorBoard tags to nice-looking metric names.
# The order of the keys in the dict determine the order they get logged.
METRIC_NAMES = collections.OrderedDict([
    ("glue_average", Metric("Average GLUE Score")),
    ("glue_cola_v002/matthews_corrcoef", Metric("CoLA")),
    ("glue_sst2_v002/accuracy", Metric("SST-2")),
    ("glue_mrpc_v002/f1", Metric("MRPC (F1)", "MRPC")),
    ("glue_mrpc_v002/accuracy", Metric("MRPC (accuracy)", "MRPC")),
    ("glue_stsb_v002/pearson_corrcoef", Metric("STSB (Pearson)", "STSB")),
    ("glue_stsb_v002/spearman_corrcoef", Metric("STSB (Spearman)", "STSB")),
    ("glue_qqp_v002/f1", Metric("QQP (F1)", "QQP")),
    ("glue_qqp_v002/accuracy", Metric("QQP (accuracy)", "QQP")),
    ("glue_mnli_matched_v002/accuracy", Metric("MNLIm", "MNLI")),
    ("glue_mnli_mismatched_v002/accuracy", Metric("MNLImm", "MNLI")),
    ("glue_qnli_v002/accuracy", Metric("QNLI")),
    ("glue_rte_v002/accuracy", Metric("GLUE RTE")),
    ("cnn_dailymail_v002/rouge1", Metric("CNN/DM (ROUGE-1)", "CNN/DM")),
    ("cnn_dailymail_v002/rouge2", Metric("CNN/DM (ROUGE-2)", "CNN/DM")),
    ("cnn_dailymail_v002/rougeL", Metric("CNN/DM (ROUGE-L)", "CNN/DM")),
    ("cnn_dailymail_v002/rougeLsum", Metric("CNN/DM (ROUGE-L)", "CNN/DM")),
    ("squad_v010_allanswers/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010_allanswers/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010_allanswers_span/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010_allanswers_span/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010/f1", Metric("SQuAD (F1)", "SQuAD")),
github google-research / text-to-text-transfer-transformer / t5 / evaluation / eval_utils.py View on Github external
METRIC_NAMES = collections.OrderedDict([
    ("glue_average", Metric("Average GLUE Score")),
    ("glue_cola_v002/matthews_corrcoef", Metric("CoLA")),
    ("glue_sst2_v002/accuracy", Metric("SST-2")),
    ("glue_mrpc_v002/f1", Metric("MRPC (F1)", "MRPC")),
    ("glue_mrpc_v002/accuracy", Metric("MRPC (accuracy)", "MRPC")),
    ("glue_stsb_v002/pearson_corrcoef", Metric("STSB (Pearson)", "STSB")),
    ("glue_stsb_v002/spearman_corrcoef", Metric("STSB (Spearman)", "STSB")),
    ("glue_qqp_v002/f1", Metric("QQP (F1)", "QQP")),
    ("glue_qqp_v002/accuracy", Metric("QQP (accuracy)", "QQP")),
    ("glue_mnli_matched_v002/accuracy", Metric("MNLIm", "MNLI")),
    ("glue_mnli_mismatched_v002/accuracy", Metric("MNLImm", "MNLI")),
    ("glue_qnli_v002/accuracy", Metric("QNLI")),
    ("glue_rte_v002/accuracy", Metric("GLUE RTE")),
    ("cnn_dailymail_v002/rouge1", Metric("CNN/DM (ROUGE-1)", "CNN/DM")),
    ("cnn_dailymail_v002/rouge2", Metric("CNN/DM (ROUGE-2)", "CNN/DM")),
    ("cnn_dailymail_v002/rougeL", Metric("CNN/DM (ROUGE-L)", "CNN/DM")),
    ("cnn_dailymail_v002/rougeLsum", Metric("CNN/DM (ROUGE-L)", "CNN/DM")),
    ("squad_v010_allanswers/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010_allanswers/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010_allanswers_span/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010_allanswers_span/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("super_glue_average", Metric("Average SuperGLUE Score")),
    ("super_glue_boolq_v102/accuracy", Metric("BoolQ (accuracy)")),
    ("super_glue_cb_v102/mean_3class_f1", Metric("CB (F1)", "CB")),
    ("super_glue_cb_v102/accuracy", Metric("CB (accuracy)", "CB")),
    ("super_glue_copa_v102/accuracy", Metric("CoPA")),
    ("super_glue_multirc_v102/f1", Metric("MultiRC (F1)", "MultiRC")),
    ("super_glue_multirc_v102/exact_match", Metric("MultiRC (EM)", "MultiRC")),
    ("super_glue_record_v102/f1", Metric("ReCoRD (F1)", "ReCoRD")),
github google-research / text-to-text-transfer-transformer / t5 / evaluation / eval_utils.py View on Github external
import tensorflow.compat.v1 as tf


class Metric(object):

  def __init__(self, name, group=None):
    self.name = name
    self.group = group or name

# This OrderedDict maps TensorBoard tags to nice-looking metric names.
# The order of the keys in the dict determine the order they get logged.
METRIC_NAMES = collections.OrderedDict([
    ("glue_average", Metric("Average GLUE Score")),
    ("glue_cola_v002/matthews_corrcoef", Metric("CoLA")),
    ("glue_sst2_v002/accuracy", Metric("SST-2")),
    ("glue_mrpc_v002/f1", Metric("MRPC (F1)", "MRPC")),
    ("glue_mrpc_v002/accuracy", Metric("MRPC (accuracy)", "MRPC")),
    ("glue_stsb_v002/pearson_corrcoef", Metric("STSB (Pearson)", "STSB")),
    ("glue_stsb_v002/spearman_corrcoef", Metric("STSB (Spearman)", "STSB")),
    ("glue_qqp_v002/f1", Metric("QQP (F1)", "QQP")),
    ("glue_qqp_v002/accuracy", Metric("QQP (accuracy)", "QQP")),
    ("glue_mnli_matched_v002/accuracy", Metric("MNLIm", "MNLI")),
    ("glue_mnli_mismatched_v002/accuracy", Metric("MNLImm", "MNLI")),
    ("glue_qnli_v002/accuracy", Metric("QNLI")),
    ("glue_rte_v002/accuracy", Metric("GLUE RTE")),
    ("cnn_dailymail_v002/rouge1", Metric("CNN/DM (ROUGE-1)", "CNN/DM")),
    ("cnn_dailymail_v002/rouge2", Metric("CNN/DM (ROUGE-2)", "CNN/DM")),
    ("cnn_dailymail_v002/rougeL", Metric("CNN/DM (ROUGE-L)", "CNN/DM")),
    ("cnn_dailymail_v002/rougeLsum", Metric("CNN/DM (ROUGE-L)", "CNN/DM")),
    ("squad_v010_allanswers/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010_allanswers/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010_allanswers_span/em", Metric("SQuAD (EM)", "SQuAD")),
github google-research / text-to-text-transfer-transformer / t5 / evaluation / eval_utils.py View on Github external
("squad_v010_allanswers/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010_allanswers_span/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010_allanswers_span/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("squad_v010/em", Metric("SQuAD (EM)", "SQuAD")),
    ("squad_v010/f1", Metric("SQuAD (F1)", "SQuAD")),
    ("super_glue_average", Metric("Average SuperGLUE Score")),
    ("super_glue_boolq_v102/accuracy", Metric("BoolQ (accuracy)")),
    ("super_glue_cb_v102/mean_3class_f1", Metric("CB (F1)", "CB")),
    ("super_glue_cb_v102/accuracy", Metric("CB (accuracy)", "CB")),
    ("super_glue_copa_v102/accuracy", Metric("CoPA")),
    ("super_glue_multirc_v102/f1", Metric("MultiRC (F1)", "MultiRC")),
    ("super_glue_multirc_v102/exact_match", Metric("MultiRC (EM)", "MultiRC")),
    ("super_glue_record_v102/f1", Metric("ReCoRD (F1)", "ReCoRD")),
    ("super_glue_record_v102/em", Metric("ReCoRD (EM)", "ReCoRD")),
    ("super_glue_rte_v102/accuracy", Metric("SuperGLUE RTE")),
    ("super_glue_wic_v102/accuracy", Metric("WiC")),
    ("super_glue_wsc_v102_simple_eval/accuracy", Metric("WSC")),
    ("dpr_v001_simple/accuracy", Metric("DPR")),
    ("wmt_t2t_ende_v003/bleu", Metric("WMT T2T En-De")),
    ("wmt14_ende_v003/bleu", Metric("WMT14 En-De")),
    ("wmt15_enfr_v003/bleu", Metric("WMT15 En-Fr")),
    ("wmt16_enro_v003/bleu", Metric("WMT16 En-Ro")),
])

Event = collections.namedtuple("event", ["step", "value"])


def parse_events_files(tb_summary_dir):
  """Parse all TensorBoard events files in tb_summary_dir.

  Args:
    tb_summary_dir: str, path to look for events files in.