Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
name = construct_name('ZeroDS-Jasper10x5', lr, batch_size, num_gpus, num_epochs,
weight_decay)
tb_writer = SummaryWriter(name)
if args.local_rank is not None:
device = nemo.core.DeviceType.AllGpu
print('Doing ALL GPU')
else:
device = nemo.core.DeviceType.GPU
# instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=opt_level,
placement=device)
jasper_model_definition = toml.load("../../examples/nemo_asr/jasper10x5.toml")
jasper_model_definition['placement'] = device
labels = jasper_model_definition['labels']['labels']
# train_manifest = "/mnt/D1/Data/librispeech/librivox-train-all.json"
train_manifest = args.train_manifest
featurizer_config = jasper_model_definition['input']
data_preprocessor = neural_factory.get_module(
name="AudioToMelSpectrogramPreprocessor",
collection="nemo_asr",
params=featurizer_config)
parser.add_argument("--attn_score_dropout", default=0.25, type=float)
parser.add_argument("--attn_layer_dropout", default=0.25, type=float)
parser.add_argument("--eval_step_frequency", default=2000, type=int)
parser.add_argument("--data_dir", default="/dataset/", type=str)
parser.add_argument("--src_lang", default="pred", type=str)
parser.add_argument("--tgt_lang", default="real", type=str)
parser.add_argument("--beam_size", default=4, type=int)
parser.add_argument("--len_pen", default=0.0, type=float)
parser.add_argument("--restore_from",
dest="restore_from",
type=str,
default="../../scripts/bert-base-uncased_decoder.pt")
args = parser.parse_args()
nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=args.work_dir,
create_tb_writer=True,
files_to_copy=[__file__],
add_time_to_log_dir=False)
# define the parameters for the first sub layer in Transformer block
dec_first_sublayer_params = {
"first_sub_layer": "self_attention",
"attn_score_dropout": args.attn_score_dropout,
"attn_layer_dropout": args.attn_layer_dropout,
}
tokenizer = NemoBertTokenizer(pretrained_model=args.pretrained_model)
vocab_size = 8 * math.ceil(tokenizer.vocab_size / 8)
def main():
args, name = parse_args()
log_dir = None
if args.work_dir:
log_dir = os.path.join(args.work_dir, name)
# instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=log_dir,
checkpoint_dir=args.checkpoint_dir,
create_tb_writer=args.create_tb_writer,
files_to_copy=[args.model_config, __file__],
cudnn_benchmark=args.cudnn_benchmark,
tensorboard_dir=args.tensorboard_dir)
if args.local_rank is not None:
neural_factory.logger.info('Doing ALL GPU')
yaml = YAML(typ="safe")
with open(args.model_config) as file:
waveglow_params = yaml.load(file)
# instantiate neural modules
def main():
# Parse args
args = parse_args()
cfg = parse_cfg(args)
name = construct_name(args, cfg)
# instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=name,
checkpoint_dir=args.checkpoint_dir,
create_tb_writer=args.create_tb_writer,
files_to_copy=[args.model_config, __file__],
cudnn_benchmark=args.cudnn_benchmark,
tensorboard_dir=args.tensorboard_dir)
logger = neural_factory.logger
tb_writer = neural_factory.tb_writer
args.checkpoint_dir = neural_factory.checkpoint_dir
logger.info(f'Name:\n{name}')
logger.info(f'Args to be passed to job #{args.local_rank}:')
logger.info(pformat(vars(args)))
args = parser.parse_args()
batch_size = args.batch_size
load_dir = args.load_dir
if args.local_rank is not None:
if args.lm_path:
raise NotImplementedError(
"Beam search decoder with LM does not currently support "
"evaluation on multi-gpu.")
device = nemo.core.DeviceType.AllGpu
else:
device = nemo.core.DeviceType.GPU
# Instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=nemo.core.Optimization.mxprO1,
placement=device)
logger = neural_factory.logger
if args.local_rank is not None:
logger.info('Doing ALL GPU')
yaml = YAML(typ="safe")
with open(args.model_config) as f:
jasper_params = yaml.load(f)
vocab = jasper_params['labels']
sample_rate = jasper_params['sample_rate']
eval_datasets = args.eval_datasets
tb_writer = None
print("Tensorboard is not available.")
if args.local_rank is not None:
device = nemo.core.DeviceType.AllGpu
else:
device = nemo.core.DeviceType.GPU
if args.mixed_precision is True:
optimization_level = nemo.core.Optimization.mxprO1
else:
optimization_level = nemo.core.Optimization.mxprO0
# Instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=optimization_level,
placement=device)
if args.bert_checkpoint is None:
tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
bert_model = nemo_nlp.huggingface.BERT(
pretrained_model_name=args.pretrained_bert_model,
factory=neural_factory)
else:
tokenizer = SentencePieceTokenizer(model_path="tokenizer.model")
tokenizer.add_special_tokens(["[MASK]", "[CLS]", "[SEP]"])
bert_model = nemo_nlp.huggingface.BERT(
config_filename=args.bert_config,
weight_decay = args.weight_decay
momentum = args.momentum
num_gpus = args.num_gpus
if args.tb_folder is None:
tb_folder = 'transfer_learning'
else:
tb_folder = args.tb_folder
tb_writer = SummaryWriter(tb_folder)
device = nemo.core.DeviceType.GPU
# Instantiate Neural Factory and Neural Modules
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
placement=device)
# NOTICE: pretrain=True argument
resnet = neural_factory.get_module(name="resnet18",
params={"num_classes": 2},
collection="torchvision",
pretrained=True)
dl_train = neural_factory.get_module(
name="ImageFolderDataLayer", collection="torchvision",
params={"batch_size": batch_size,
"input_size": resnet.input_ports["x"].axis2type[2].dim,
"shuffle": True,
"path": "hymenoptera_data/train",
})
ENABLE_NGRAM = False
# This is only necessary if ENABLE_NGRAM = True. Otherwise, set to empty string
LM_PATH = ""
# Read model YAML
yaml = YAML(typ="safe")
with open(MODEL_YAML) as f:
jasper_model_definition = yaml.load(f)
labels = jasper_model_definition['labels']
# Instantiate necessary Neural Modules
# Note that data layer is missing from here
neural_factory = nemo.core.NeuralModuleFactory(
placement=nemo.core.DeviceType.GPU,
backend=nemo.core.Backend.PyTorch)
data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
factory=neural_factory)
jasper_encoder = nemo_asr.JasperEncoder(
jasper=jasper_model_definition['JasperEncoder']['jasper'],
activation=jasper_model_definition['JasperEncoder']['activation'],
feat_in=jasper_model_definition[
'AudioToMelSpectrogramPreprocessor']['features'])
jasper_encoder.restore_from(CHECKPOINT_ENCODER, local_rank=0)
jasper_decoder = nemo_asr.JasperDecoderForCTC(
feat_in=1024,
num_classes=len(labels))
jasper_decoder.restore_from(CHECKPOINT_DECODER, local_rank=0)
greedy_decoder = nemo_asr.GreedyCTCDecoder()
if ENABLE_NGRAM and os.path.isfile(LM_PATH):
beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(