How to use deepchem - 10 common examples

To help you get started, we’ve selected a few deepchem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepchem / deepchem / deepchem / data / test_data_loader.py View on Github external
def test_load_singleton_csv(self):
    fin = tempfile.NamedTemporaryFile(mode='w', delete=False)
    fin.write("smiles,endpoint\nc1ccccc1,1")
    fin.close()
    print(fin.name)
    featurizer = dc.feat.CircularFingerprint(size=1024)
    tasks = ["endpoint"]
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)

    X = loader.featurize(fin.name)
    self.assertEqual(1, len(X))
    os.remove(fin.name)
github deepchem / deepchem / examples / chembl / chembl_graph_conv.py View on Github external
from deepchem.models import GraphConvModel

np.random.seed(123)
import tensorflow as tf

tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_chembl

# Load ChEMBL dataset
chembl_tasks, datasets, transformers = load_chembl(
    shard_size=2000, featurizer="GraphConv", set="5thresh", split="random")
train_dataset, valid_dataset, test_dataset = datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128

model = GraphConvModel(
    len(chembl_tasks), batch_size=batch_size, mode='regression')

# Fit trained model
model.fit(train_dataset, nb_epoch=20)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
github deepchem / deepchem / examples / qm8 / qm8_ANI.py View on Github external
# Batch size of models
max_atoms = 26
batch_size = 128
layer_structures = [128, 128, 64]
atom_number_cases = [1, 6, 7, 8, 9]

ANItransformer = dc.trans.ANITransformer(
    max_atoms=max_atoms, atom_cases=atom_number_cases)
train_dataset = ANItransformer.transform(train_dataset)
valid_dataset = ANItransformer.transform(valid_dataset)
test_dataset = ANItransformer.transform(test_dataset)
n_feat = ANItransformer.get_num_feats() - 1

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

model = dc.models.ANIRegression(
    len(tasks),
    max_atoms,
    n_feat,
    layer_structures=layer_structures,
    atom_number_cases=atom_number_cases,
    batch_size=batch_size,
    learning_rate=0.001,
    use_queue=False,
    mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=300, checkpoint_interval=100)
github deepchem / deepchem / examples / hopv / hopv_tf_progressive.py View on Github external
import shutil
import numpy as np
import deepchem as dc
from deepchem.molnet import load_hopv

# Only for debug!
np.random.seed(123)

# Load HOPV dataset
n_features = 1024
hopv_tasks, hopv_datasets, transformers = load_hopv()
train_dataset, valid_dataset, test_dataset = hopv_datasets

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"),
    dc.metrics.Metric(
        dc.metrics.mean_absolute_error, np.mean, mode="regression")
]

model = dc.models.ProgressiveMultitaskRegressor(
    len(hopv_tasks),
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50)

# Fit trained model
model.fit(train_dataset, nb_epoch=25)

print("Evaluating model")
github deepchem / deepchem / examples / qm7 / qm7_tensorgraph_GraphConv.py View on Github external
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm7_from_mat(
    featurizer='GraphConv', move_mean=True)
train_dataset, valid_dataset, test_dataset = datasets

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

# Batch size of models
batch_size = 64

model = dc.models.GraphConvModel(
    len(tasks), batch_size=batch_size, learning_rate=0.001, mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=50)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
github deepchem / deepchem / contrib / one_shot_models / examples / tox_attn_one_fold.py View on Github external
n_neg = 1
# Set batch sizes for network
test_batch_size = 128
support_batch_size = n_pos + n_neg
nb_epochs = 1
n_train_trials = 2000
n_eval_trials = 20
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 75

tox21_tasks, dataset, transformers = load_tox21_convmol()

# Define metric
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")

task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)

train_folds = fold_datasets[:-1]
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]

# Train support model on train
support_model = dc.nn.SequentialSupportGraph(n_feat)

# Add layers
support_model.add(dc.nn.GraphConv(64, n_feat, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(128, 64, activation='relu'))
support_model.add(dc.nn.GraphPool())
github deepchem / deepchem / contrib / atomicconv / acnn / core / opt_stratified.py View on Github external
bias_init_consts=[0., 0., 0.],
    penalty=penalty,
    penalty_type=penalty_type,
    dropouts=dropouts,
    learning_rate=0.002,
    momentum=0.8,
    optimizer="adam",
    batch_size=64,
    conv_layers=1,
    boxsize=None,
    verbose=True,
    seed=seed)
model.fit(train_dataset, nb_epoch=100)
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
                                              transformers)
train_scores = train_evaluator.compute_model_performance(
    metric,
    csv_out="train_predict_ac_stratified.csv",
    stats_out="train_stats_ac_stratified.csv")
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(
    metric,
    csv_out="test_predict_ac_stratified.csv",
    stats_out="test_stats_ac_stratified.csv")
print("Test scores")
print(test_scores)
github deepchem / deepchem / contrib / atomicconv / acnn / core / opt_scaffold.py View on Github external
bias_init_consts=[0., 0., 0.],
    penalty=penalty,
    penalty_type=penalty_type,
    dropouts=dropouts,
    learning_rate=0.002,
    momentum=0.8,
    optimizer="adam",
    batch_size=64,
    conv_layers=1,
    boxsize=None,
    verbose=True,
    seed=seed)
model.fit(train_dataset, nb_epoch=100)
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset,
                                              transformers)
train_scores = train_evaluator.compute_model_performance(
    metric,
    csv_out="train_predict_ac_scaffold.csv",
    stats_out="train_stats_ac_scaffold.csv")
print("Train scores")
print(train_scores)
test_evaluator = dc.utils.evaluate.Evaluator(model, test_dataset, transformers)
test_scores = test_evaluator.compute_model_performance(
    metric,
    csv_out="test_predict_ac_scaffold.csv",
    stats_out="test_stats_ac_scaffold.csv")
print("Test scores")
print(test_scores)
github deepchem / deepchem / examples / muv / muv_tf.py View on Github external
import os
import numpy as np
import shutil
import deepchem as dc
from deepchem.molnet import load_muv

np.random.seed(123)

# Load MUV data
muv_tasks, muv_datasets, transformers = load_muv()
train_dataset, valid_dataset, test_dataset = muv_datasets

# Build model
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

rate = dc.models.optimizers.ExponentialDecay(0.001, 0.8, 1000)
model = dc.models.MultitaskClassifier(
    len(muv_tasks),
    n_features=1024,
    dropouts=[.25],
    learning_rate=rate,
    weight_init_stddevs=[.1],
    batch_size=64,
    verbosity="high")

# Fit trained model
model.fit(train_dataset)

# Evaluate train/test scores
train_scores = model.evaluate(train_dataset, [metric], transformers)
github deepchem / deepchem / examples / kinase / KINASE_tf_model.py View on Github external
all_results = []
for trial in range(num_trials):
  ###Create model###
  n_layers = 3
  nb_epoch = 50
  model = dc.models.TensorflowMultiTaskRegressor(
      len(KINASE_tasks), train_dataset.get_data_shape()[0],
      layer_sizes=[1000]*n_layers, dropouts=[.25]*n_layers,
      weight_init_stddevs=[.02]*n_layers,
      bias_init_consts=[.5]*n_layers, learning_rate=.0003,
      penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
      verbosity="high")

  #Use R2 classification metric
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)

  print("Training model")
  model.fit(train_dataset, nb_epoch=nb_epoch, max_checkpoints_to_keep=1)

  print("Evaluating models")
  train_score, train_task_scores = model.evaluate(
      train_dataset, [metric], transformers, per_task_metrics=True)
  valid_score, valid_task_scores = model.evaluate(
      valid_dataset, [metric], transformers, per_task_metrics=True)
  test_score, test_task_scores = model.evaluate(
      test_dataset, [metric], transformers, per_task_metrics=True)

  all_results.append((train_score, train_task_scores,
                      valid_score, valid_task_scores,
                      test_score, test_task_scores))