Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_ngram_2(data):
ngram = NGram()
with pytest.warns(RuntimeWarning, match=''):
ngram.fit(data['pg'][0][:20], train_order=5)
assert ngram._train_order == 5
assert ngram.sample_order == 5
assert ngram.ngram_table is not None
np.random.seed(123456)
with pytest.warns(RuntimeWarning, match='can not convert'):
old_smis = ['CC(=S)C([*])(C)=CCC([*])']
tmp = ngram.proposal(old_smis)
assert tmp == old_smis
np.random.seed(654321)
with pytest.warns(RuntimeWarning, match='get_prob: '):
def test_iqspr_1(data):
np.random.seed(0)
ecfp = ECFP(n_jobs=1, input_type='smiles')
bre = GaussianLogLikelihood(descriptor=ecfp)
ngram = NGram()
iqspr = IQSPR(estimator=bre, modifier=ngram)
X, y = data['pg']
bre.fit(X, y)
ngram.fit(data['pg'][0][0:20], train_order=10)
beta = np.linspace(0.05, 1, 10)
for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True, bandgap=(0.1, 0.2), density=(0.9, 1.2)):
assert np.abs(np.sum(p) - 1.0) < 1e-5
assert np.sum(f) == 5, print(f)
def test_ngram_3(data):
ngram = NGram(sample_order=5)
ngram.fit(data['pg'][0][:20], train_order=5)
def on_errors(self, error):
if isinstance(error, MolConvertError):
raise error
else:
return error.old_smi
np.random.seed(123456)
ngram.on_errors = types.MethodType(on_errors, ngram)
with pytest.raises(MolConvertError):
old_smis = ['CC(=S)C([*])(C)=CCC([*])']
ngram.proposal(old_smis)
def on_errors(self, error):
if isinstance(error, GetProbError):
def data():
# ignore numpy warning
import warnings
print('ignore NumPy RuntimeWarning\n')
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ndarray size changed")
pwd = Path(__file__).parent
pg_data = pd.read_csv(str(pwd / 'polymer_test_data.csv'))
X = pg_data['smiles']
y = pg_data.drop(['smiles', 'Unnamed: 0'], axis=1)
ecfp = ECFP(n_jobs=1, input_type='smiles')
bre = GaussianLogLikelihood(descriptor=ecfp)
ngram = NGram()
iqspr = IQSPR(estimator=bre, modifier=ngram)
# prepare test data
yield dict(ecfp=ecfp, bre=bre, ngram=ngram, iqspr=iqspr, pg=(X, y))
print('test over')
def test_ngram_1(data):
ngram = NGram()
assert ngram.ngram_table is None
assert ngram.max_len == 1000
assert ngram.del_range == (1, 10)
assert ngram.reorder_prob == 0
assert ngram.sample_order == 10
assert ngram._train_order is None
ngram.set_params(max_len=500, reorder_prob=0.2)
assert ngram.max_len == 500
assert ngram.del_range == (1, 10)
assert ngram.reorder_prob == 0.2
combine two SMILES strings with '*' as connection points, note that no proper treatment for '-*', '=*', or '#*' yet.
Parameters
----------
smis_base: str
SMILES for combining.
If no '*', assume connection point at the end.
If more than one '*', the first will be picked if it's not the 1st character.
smis_frag: str
SMILES for combining.
If no '*', assume connection point at the front.
If more than one '*', the first will be picked.
"""
# prepare NGram object for use of ext. SMILES
ngram = NGram()
# check position of '*'
mols_base = Chem.MolFromSmiles(smis_base)
if mols_base is None:
raise RuntimeError('Invalid base SMILES!')
idx_base = [i for i in range(mols_base.GetNumAtoms()) if mols_base.GetAtomWithIdx(i).GetSymbol() == '*']
# rearrange base SMILES to avoid 1st char = '*' (assume no '**')
if len(idx_base) == 1 and idx_base[0] == 0:
smis_base_head = Chem.MolToSmiles(mols_base,rootedAtAtom=1)
elif len(idx_base) == 0:
smis_base_head = smis_base + '*'
else:
smis_base_head = smis_base
# converge base to ext. SMILES and pick insertion location
"""
combine two SMILES strings with '*' as connection points
Parameters
----------
smis_base: str
SMILES for combining.
If no '*', assume connection point at the end.
If more than one '*', the first will be picked if it's not the 1st character.
smis_frag: str
SMILES for combining.
If no '*', assume connection point at the front.
If more than one '*', the first will be picked.
"""
# prepare NGram object for use of ext. SMILES
ngram = NGram()
# check position of '*'
mols_base = Chem.MolFromSmiles(smis_base)
if mols_base is None:
raise RuntimeError('Invalid base SMILES!')
idx_base = [i for i in range(mols_base.GetNumAtoms()) if mols_base.GetAtomWithIdx(i).GetSymbol() == '*']
# rearrange base SMILES to avoid 1st char = '*'
if len(idx_base) == 1 and idx_base[0] == 0:
smis_base_head = Chem.MolToSmiles(mols_base,rootedAtAtom=1)
elif len(idx_base) == 0:
smis_base_head = smis_base + '*'
else:
smis_base_head = smis_base
# converge base to ext. SMILES and pick insertion location
"""
combine two SMILES strings with '*' as connection points
Parameters
----------
smis_base: str
SMILES for combining.
If no '*', assume connection point at the end.
If more than one '*', the first will be picked if it's not the 1st character.
smis_frag: str
SMILES for combining.
If no '*', assume connection point at the front.
If more than one '*', the first will be picked.
"""
# prepare NGram object for use of ext. SMILES
ngram = NGram()
# check position of '*'
mols_base = Chem.MolFromSmiles(smis_base)
if mols_base is None:
raise RuntimeError('Invalid base SMILES!')
idx_base = [i for i in range(mols_base.GetNumAtoms()) if mols_base.GetAtomWithIdx(i).GetSymbol() == '*']
# rearrange base SMILES to avoid 1st char = '*'
if len(idx_base) == 1 and idx_base[0] == 0:
smis_base_head = Chem.MolToSmiles(mols_base,rootedAtAtom=1)
elif len(idx_base) == 0:
smis_base_head = smis_base + '*'
else:
smis_base_head = smis_base
# converge base to ext. SMILES and pick insertion location