Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def predict(args):
# Load Paddlehub pretrained model
module = hub.Module(name=args.module)
input_dict, output_dict, program = module.context(trainable=True)
# Download dataset
if args.dataset.lower() == "flowers":
dataset = hub.dataset.Flowers()
elif args.dataset.lower() == "dogcat":
dataset = hub.dataset.DogCat()
elif args.dataset.lower() == "indoor67":
dataset = hub.dataset.Indoor67()
elif args.dataset.lower() == "food101":
dataset = hub.dataset.Food101()
elif args.dataset.lower() == "stanforddogs":
dataset = hub.dataset.StanfordDogs()
else:
raise ValueError("%s dataset is not defined" % args.dataset)
# Use ImageClassificationReader to read dataset
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
feature_map = output_dict["feature_map"]
# Setup feed list for data feeder
feed_list = [input_dict["image"].name]
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub ERNIE 2.0 pretrained model
module = hub.Module(name="ernie_v2_eng_base")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Download dataset and use MultiLabelReader to read dataset
dataset = hub.dataset.Toxic()
reader = hub.reader.MultiLabelClassifyReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
fc = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
return fc
if __name__ == '__main__':
# Step1: load Paddlehub elmo pretrained model
module = hub.Module(name="elmo")
inputs, outputs, program = module.context(trainable=True)
# Step2: Download dataset and use LACClassifyReade to read dataset
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.LACClassifyReader(
dataset=dataset, vocab_path=module.get_vocab_path())
word_dict_len = len(reader.vocab)
word_ids = inputs["word_ids"]
elmo_embedding = outputs["elmo_embed"]
# Step3: switch program and build network
# Choose the net which you would like: bow, cnn, gru, bilstm, lstm
switch_main_program(program)
# Embedding layer
word_embed_dims = 128
word_embedding = fluid.layers.embedding(
input=word_ids,
def finetune(args):
# Load Paddlehub pretrained model
module = hub.Module(name=args.module)
input_dict, output_dict, program = module.context(trainable=True)
# Download dataset
if args.dataset.lower() == "flowers":
dataset = hub.dataset.Flowers()
elif args.dataset.lower() == "dogcat":
dataset = hub.dataset.DogCat()
elif args.dataset.lower() == "indoor67":
dataset = hub.dataset.Indoor67()
elif args.dataset.lower() == "food101":
dataset = hub.dataset.Food101()
elif args.dataset.lower() == "stanforddogs":
dataset = hub.dataset.StanfordDogs()
else:
raise ValueError("%s dataset is not defined" % args.dataset)
# Use ImageClassificationReader to read dataset
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
def finetune(args):
# Load Paddlehub pretrained model
module = hub.Module(name=args.module)
input_dict, output_dict, program = module.context(trainable=True)
# Download dataset
if args.dataset.lower() == "flowers":
dataset = hub.dataset.Flowers()
elif args.dataset.lower() == "dogcat":
dataset = hub.dataset.DogCat()
elif args.dataset.lower() == "indoor67":
dataset = hub.dataset.Indoor67()
elif args.dataset.lower() == "food101":
dataset = hub.dataset.Food101()
elif args.dataset.lower() == "stanforddogs":
dataset = hub.dataset.StanfordDogs()
else:
raise ValueError("%s dataset is not defined" % args.dataset)
# Use ImageClassificationReader to read dataset
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
feature_map = output_dict["feature_map"]
# Setup feed list for data feeder
feed_list = [input_dict["image"].name]
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch when the program predicts.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# loading Paddlehub senta pretrained model
module = hub.Module(name="senta_bilstm")
inputs, outputs, program = module.context(trainable=True)
# Download dataset and use LACClassifyReader to read dataset
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.LACClassifyReader(
dataset=dataset, vocab_path=module.get_vocab_path())
sent_feature = outputs["sentence_feature"]
# Setup feed list for data feeder
# Must feed all the tensor of senta's module need
feed_list = [inputs["words"].name]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy())
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub ERNIE Tiny pretrained model
module = hub.Module(name="ernie_tiny")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Download dataset and use accuracy as metrics
# Choose dataset: GLUE/XNLI/ChinesesGLUE/NLPCC-DBQA/LCQMC
dataset = hub.dataset.ChnSentiCorp()
# For ernie_tiny, it use sub-word to tokenize chinese sentence
# If not ernie tiny, sp_model_path and word_dict_path should be set None
reader = hub.reader.ClassifyReader(
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=module.get_spm_path(),
word_dict_path=module.get_word_dict_path())
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"]
# Setup feed list for data feeder
# Must feed all the tensor of module need
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub ERNIE pretrained model
module = hub.Module(name="ernie")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Download dataset and use ClassifyReader to read dataset
dataset = hub.dataset.NLPCC_DBQA()
reader = hub.reader.ClassifyReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
def predict(args):
# Load Paddlehub pretrained model
module = hub.Module(name=args.module)
input_dict, output_dict, program = module.context(trainable=True)
# Download dataset
if args.dataset.lower() == "flowers":
dataset = hub.dataset.Flowers()
elif args.dataset.lower() == "dogcat":
dataset = hub.dataset.DogCat()
elif args.dataset.lower() == "indoor67":
dataset = hub.dataset.Indoor67()
elif args.dataset.lower() == "food101":
dataset = hub.dataset.Food101()
elif args.dataset.lower() == "stanforddogs":
dataset = hub.dataset.StanfordDogs()
else:
raise ValueError("%s dataset is not defined" % args.dataset)
# Use ImageClassificationReader to read dataset
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=dataset)
feature_map = output_dict["feature_map"]
# Setup feed list for data feeder
import paddle.fluid as fluid
import paddlehub as hub
# Step1
module = hub.Module(name="ernie")
inputs, outputs, program = module.context(trainable=True, max_seq_len=128)
# Step2
dataset = hub.dataset.ChnSentiCorp()
reader = hub.reader.ClassifyReader(
dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=128)
# Step3
with fluid.program_guard(program):
label = fluid.layers.data(name="label", shape=[1], dtype='int64')
pooled_output = outputs["pooled_output"]
cls_task = hub.create_text_classification_task(
feature=pooled_output, label=label, num_classes=dataset.num_labels)
# Step4
strategy = hub.AdamWeightDecayStrategy(
learning_rate=5e-5,
warmup_proportion=0.1,