How to use the deepchem.metrics.pearson_r2_score function in deepchem

To help you get started, we’ve selected a few deepchem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepchem / deepchem / examples / hopv / hopv_tf_progressive.py View on Github external
import shutil
import numpy as np
import deepchem as dc
from deepchem.molnet import load_hopv

# Only for debug!
np.random.seed(123)

# Load HOPV dataset
n_features = 1024
hopv_tasks, hopv_datasets, transformers = load_hopv()
train_dataset, valid_dataset, test_dataset = hopv_datasets

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"),
    dc.metrics.Metric(
        dc.metrics.mean_absolute_error, np.mean, mode="regression")
]

model = dc.models.ProgressiveMultitaskRegressor(
    len(hopv_tasks),
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50)

# Fit trained model
model.fit(train_dataset, nb_epoch=25)

print("Evaluating model")
github deepchem / deepchem / examples / pdbbind / pdbbind_tf.py View on Github external
import numpy as np
import tensorflow as tf
# For stable runs
np.random.seed(123)
tf.set_random_seed(123)

import deepchem as dc
from deepchem.molnet import load_pdbbind_grid

split = "random"
subset = "full"
pdbbind_tasks, pdbbind_datasets, transformers = load_pdbbind_grid(
    split=split, subset=subset)
train_dataset, valid_dataset, test_dataset = pdbbind_datasets

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

current_dir = os.path.dirname(os.path.realpath(__file__))
model_dir = os.path.join(current_dir, "%s_%s_DNN" % (split, subset))

n_features = train_dataset.X.shape[1]
model = dc.models.MultitaskRegressor(
    len(pdbbind_tasks),
    n_features,
    logdir=model_dir,
    dropouts=[.25],
    learning_rate=0.0003,
    weight_init_stddevs=[.1],
    batch_size=64)

# Fit trained model
model.fit(train_dataset, nb_epoch=100)
github taneishi / dlvs / chembl / tf_models.py View on Github external
print("Number of compounds in test set")
print(len(test_dataset))

###Create model###
n_layers = 2
nb_epoch = 50
model = dc.models.TensorflowMultiTaskRegressor(
    len(chembl_tasks), train_dataset.get_data_shape()[0],
    layer_sizes=[1000]*n_layers, dropouts=[0.25]*n_layers,
    weight_init_stddevs=[0.02]*n_layers,
    bias_init_consts=[1.]*n_layers, learning_rate=0.0008,
    penalty=0.0005, penalty_type="l2", optimizer="adam", batch_size=128,
    seed=123, verbosity="high")

#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)

start = timeit.default_timer()

print("Training model")
model.fit(train_dataset, nb_epoch=nb_epoch)

train_time = timeit.default_timer() - start

start = timeit.default_timer()

train_score, train_scores = model.evaluate(train_dataset, [metric], transformers, per_task_metrics=True)
valid_score, valid_scores = model.evaluate(valid_dataset, [metric], transformers, per_task_metrics=True)
test_score, test_scores = model.evaluate(test_dataset, [metric], transformers, per_task_metrics=True)

eval_time = timeit.default_timer() - start
github deepchem / deepchem / examples / roitberg / roitberg.py View on Github external
for idx in ids:
      new_metadata.append(metadata[idx])

  return new_metadata


if __name__ == "__main__":

  max_atoms = 23
  batch_size = 64  # CHANGED FROM 16
  layer_structures = [128, 128, 64]
  atom_number_cases = [1, 6, 7, 8]

  metric = [
      dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
      dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
  ]

  print("Fitting new model...")

  train_valid_dataset, test_dataset, all_groups = load_roiterberg_ANI(
      mode="atomization")

  splitter = dc.splits.RandomGroupSplitter(
      broadcast(train_valid_dataset, all_groups))

  print("Performing 1-fold split...")
  train_dataset, valid_dataset = splitter.train_test_split(
      train_valid_dataset, train_dir=train_dir, test_dir=valid_dir)

  transformers = [
      dc.trans.NormalizationTransformer(
github deepchem / deepchem / contrib / atomicconv / acnn / refined / opt_scaffold.py View on Github external
dropouts=dropouts,
    learning_rate=learning_rate,
    momentum=momentum,
    optimizer="adam",
    batch_size=batch_size,
    conv_layers=1,
    boxsize=None,
    verbose=True,
    seed=seed)

# Fit model
model.fit(train_dataset, nb_epoch=10)

# Evaluate model
metric = [
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression"),
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
                                              transformers)
train_scores = train_evaluator.compute_model_performance(metric)
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(metric)
print("Test scores")
print(test_scores)
github deepchem / deepchem / examples / membrane_permeability / membrane_permeability_graph_conv.py View on Github external
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from membrane_permeability_datasets import load_permeability

# Load Tox21 dataset
permeability_tasks, permeability_datasets, transformers = load_permeability(
    featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = permeability_datasets

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

n_atom_feat = 75
batch_size = 64

max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

reshard_size = 512
transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
train_dataset.reshard(reshard_size)
train_dataset = transformer.transform(train_dataset)
valid_dataset.reshard(reshard_size)
valid_dataset = transformer.transform(valid_dataset)
github deepchem / deepchem / examples / kaggle / KAGGLE_rf_model.py View on Github external
num_trials = 5
print("About to load KAGGLE data.")
KAGGLE_tasks, datasets, transformers = load_kaggle(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets

print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))

num_features = train_dataset.get_data_shape()[0]
print("Num features: %d" % num_features)

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)


def task_model_builder(model_dir):
  sklearn_model = RandomForestRegressor(
      #n_estimators=100, max_features=int(num_features/3),
      n_estimators=1,
      max_features=int(num_features / 3),
      min_samples_split=5,
      n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir)


all_results = []
for trial in range(num_trials):
  print("Starting trial %d" % trial)
  model = dc.models.SingletaskToMultitask(KAGGLE_tasks, task_model_builder)
github deepchem / deepchem / examples / delaney / delaney_MPNN.py View on Github external
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney(
    featurizer='Weave', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

n_atom_feat = 75
n_pair_feat = 14
# Batch size of models
batch_size = 64

model = dc.models.MPNNModel(
    len(delaney_tasks),
    n_atom_feat=n_atom_feat,
    n_pair_feat=n_pair_feat,
    T=3,
    M=5,
    batch_size=batch_size,
    learning_rate=0.0001,
    use_queue=False,
    mode="regression")
github deepchem / deepchem / examples / qm7 / qm7_tf_model.py View on Github external
from __future__ import unicode_literals

import os
import deepchem as dc
import numpy as np
from deepchem.molnet import load_qm7_from_mat
from deepchem.models.optimizers import ExponentialDecay

np.random.seed(123)
qm7_tasks, datasets, transformers = load_qm7_from_mat(
    split='stratified', move_mean=True)
train_dataset, valid_dataset, test_dataset = datasets
fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)]
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

rate = 0.001
model = dc.models.MultitaskFitTransformRegressor(
    n_tasks=1,
    n_features=[23, 23],
    learning_rate=rate,
    momentum=.8,
    batch_size=25,
    weight_init_stddevs=[1 / np.sqrt(400), 1 / np.sqrt(100), 1 / np.sqrt(100)],
    bias_init_consts=[0., 0., 0.],
    layer_sizes=[400, 100, 100],
    dropouts=[0.01, 0.01, 0.01],
    fit_transformers=fit_transformers,
    seed=123)
github deepchem / deepchem / examples / kinase / KINASE_tf_robust.py View on Github external
KINASE_tasks, datasets, transformers = load_kinase(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets

print("Number of compounds in train set")
print(len(train_dataset))
print("Number of compounds in validation set")
print(len(valid_dataset))
print("Number of compounds in test set")
print(len(test_dataset))

n_layers = 3
n_bypass_layers = 3
nb_epoch = 50

#Use R2 classification metric
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)

all_results = []
for trial in range(num_trials):
  model = dc.models.RobustMultitaskRegressor(
      len(KINASE_tasks),
      train_dataset.get_data_shape()[0],
      layer_sizes=[500] * n_layers,
      bypass_layer_sizes=[50] * n_bypass_layers,
      dropouts=[.25] * n_layers,
      bypass_dropouts=[.25] * n_bypass_layers,
      weight_init_stddevs=[.02] * n_layers,
      bias_init_consts=[.5] * n_layers,
      bypass_weight_init_stddevs=[.02] * n_bypass_layers,
      bypass_bias_init_consts=[.5] * n_bypass_layers,
      learning_rate=.0003,
      weight_decay_penalty=.0001,