How to use the t5.data.utils.rate_num_examples function in t5

To help you get started, we’ve selected a few t5 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github google-research / text-to-text-transfer-transformer / t5 / data / mixtures.py View on Github external
def _dedupe(name):
  if "glue" in name and "rte" in name:
    return functools.partial(rate_num_examples, scale=0.5)
  return rate_num_examples
github google-research / text-to-text-transfer-transformer / t5 / data / mixtures.py View on Github external
# a single dataset to train on.
_finetune_tasks = [
    "glue_v002_proportional",  # mixture
    "super_glue_v102_proportional",  # mixture
    "cnn_dailymail_v002",
    "squad_v010_allanswers",
    "wmt_t2t_ende_v003",
    "wmt15_enfr_v003",
    "wmt16_enro_v003"
]

# ========================== GLUE and SuperGLUE ================================

MixtureRegistry.add(
    "glue_v002_proportional",
    _glue_tasks, default_rate=rate_num_examples)


MixtureRegistry.add(
    "super_glue_v102_proportional",
    _super_glue_tasks,
    default_rate=rate_num_examples)


# mnli and its associated dev sets: mnli_matched and mnli_mismatched
MixtureRegistry.add(
    "glue_mnli_and_dev_v002",
    [t for t in _glue_tasks if "mnli" in t],
    default_rate=1.0)

# ============================== Co-training ===================================
github google-research / text-to-text-transfer-transformer / t5 / data / mixtures.py View on Github external
# ================== Leave-one-out cotrain then finetune =======================

for task_name in _finetune_tasks:
  task_names = set(_supervised_tasks + ["c4_v020_unsupervised"])

  # Special case to treat all GLUE tasks as one task.
  if task_name == "glue_v002_proportional":
    task_names -= set(_glue_tasks)
    # No de-duping needed
    tasks = [(t, rate_num_examples) for t in task_names]
  # Special case to treat all Super GLUE tasks as one task.
  elif task_name == "super_glue_v102_proportional":
    task_names -= set(_super_glue_tasks)
    # No de-duping needed
    tasks = [(t, rate_num_examples) for t in task_names]
  else:
    task_names -= {task_name}
    # Use de-duping since we have GLUE and SuperGLUE
    tasks = [(t, _dedupe(t)) for t in task_names]

  MixtureRegistry.add("leave_one_out_{}".format(task_name), tasks)

# ================= Pre-train on supervised tasks ==============================

_large_translation_tasks = ["wmt_t2t_ende_v003",
                            "wmt15_enfr_v003"]

_large_supervised_tasks = _large_translation_tasks + ["cnn_dailymail_v002"]

MixtureRegistry.add(
    "large_supervised_equal",
github google-research / text-to-text-transfer-transformer / t5 / data / mixtures.py View on Github external
def _dedupe(name):
  if "glue" in name and "rte" in name:
    return functools.partial(rate_num_examples, scale=0.5)
  return rate_num_examples