How to use the xenonpy.inverse.iqspr.NGram function in xenonpy

To help you get started, we’ve selected a few xenonpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yoshida-lab / XenonPy / tests / inverse / test_iqspr.py View on Github external
def test_ngram_2(data):
    ngram = NGram()

    with pytest.warns(RuntimeWarning, match=''):
        ngram.fit(data['pg'][0][:20], train_order=5)

    assert ngram._train_order == 5
    assert ngram.sample_order == 5
    assert ngram.ngram_table is not None

    np.random.seed(123456)
    with pytest.warns(RuntimeWarning, match='can not convert'):
        old_smis = ['CC(=S)C([*])(C)=CCC([*])']
        tmp = ngram.proposal(old_smis)
        assert tmp == old_smis

    np.random.seed(654321)
    with pytest.warns(RuntimeWarning, match='get_prob: '):
github yoshida-lab / XenonPy / tests / inverse / test_iqspr.py View on Github external
def test_iqspr_1(data):
    np.random.seed(0)
    ecfp = ECFP(n_jobs=1, input_type='smiles')
    bre = GaussianLogLikelihood(descriptor=ecfp)
    ngram = NGram()
    iqspr = IQSPR(estimator=bre, modifier=ngram)
    X, y = data['pg']
    bre.fit(X, y)
    ngram.fit(data['pg'][0][0:20], train_order=10)
    beta = np.linspace(0.05, 1, 10)
    for s, ll, p, f in iqspr(data['pg'][0][:5], beta, yield_lpf=True, bandgap=(0.1, 0.2), density=(0.9, 1.2)):
        assert np.abs(np.sum(p) - 1.0) < 1e-5
        assert np.sum(f) == 5, print(f)
github yoshida-lab / XenonPy / tests / inverse / test_iqspr.py View on Github external
def test_ngram_3(data):
    ngram = NGram(sample_order=5)
    ngram.fit(data['pg'][0][:20], train_order=5)

    def on_errors(self, error):
        if isinstance(error, MolConvertError):
            raise error
        else:
            return error.old_smi

    np.random.seed(123456)
    ngram.on_errors = types.MethodType(on_errors, ngram)
    with pytest.raises(MolConvertError):
        old_smis = ['CC(=S)C([*])(C)=CCC([*])']
        ngram.proposal(old_smis)

    def on_errors(self, error):
        if isinstance(error, GetProbError):
github yoshida-lab / XenonPy / tests / inverse / test_iqspr.py View on Github external
def data():
    # ignore numpy warning
    import warnings
    print('ignore NumPy RuntimeWarning\n')
    warnings.filterwarnings("ignore", message="numpy.dtype size changed")
    warnings.filterwarnings("ignore", message="numpy.ndarray size changed")

    pwd = Path(__file__).parent
    pg_data = pd.read_csv(str(pwd / 'polymer_test_data.csv'))

    X = pg_data['smiles']
    y = pg_data.drop(['smiles', 'Unnamed: 0'], axis=1)
    ecfp = ECFP(n_jobs=1, input_type='smiles')
    bre = GaussianLogLikelihood(descriptor=ecfp)
    ngram = NGram()
    iqspr = IQSPR(estimator=bre, modifier=ngram)
    # prepare test data
    yield dict(ecfp=ecfp, bre=bre, ngram=ngram, iqspr=iqspr, pg=(X, y))

    print('test over')
github yoshida-lab / XenonPy / tests / inverse / test_iqspr.py View on Github external
def test_ngram_1(data):
    ngram = NGram()
    assert ngram.ngram_table is None
    assert ngram.max_len == 1000
    assert ngram.del_range == (1, 10)
    assert ngram.reorder_prob == 0
    assert ngram.sample_order == 10
    assert ngram._train_order is None

    ngram.set_params(max_len=500, reorder_prob=0.2)

    assert ngram.max_len == 500
    assert ngram.del_range == (1, 10)
    assert ngram.reorder_prob == 0.2
github yoshida-lab / XenonPy / xenonpy / contrib / sample_codes / combine_fragments / combine_fragments.py View on Github external
combine two SMILES strings with '*' as connection points, note that no proper treatment for '-*', '=*', or '#*' yet.

    Parameters
    ----------
    smis_base: str
        SMILES for combining.
        If no '*', assume connection point at the end.
        If more than one '*', the first will be picked if it's not the 1st character.
    smis_frag: str
        SMILES for combining.
        If no '*', assume connection point at the front.
        If more than one '*', the first will be picked.
    """

    # prepare NGram object for use of ext. SMILES
    ngram = NGram()

    # check position of '*'
    mols_base = Chem.MolFromSmiles(smis_base)
    if mols_base is None:
        raise RuntimeError('Invalid base SMILES!')
    idx_base = [i for i in range(mols_base.GetNumAtoms()) if mols_base.GetAtomWithIdx(i).GetSymbol() == '*']

    # rearrange base SMILES to avoid 1st char = '*' (assume no '**')
    if len(idx_base) == 1 and idx_base[0] == 0:
        smis_base_head = Chem.MolToSmiles(mols_base,rootedAtAtom=1)
    elif len(idx_base) == 0:
        smis_base_head = smis_base + '*'
    else:
        smis_base_head = smis_base

    # converge base to ext. SMILES and pick insertion location
github yoshida-lab / XenonPy / xenonpy / contrib / sample_codes / iQSPR_V / iQSPR_VF.py View on Github external
"""
        combine two SMILES strings with '*' as connection points
        Parameters
        ----------
        smis_base: str
            SMILES for combining.
            If no '*', assume connection point at the end.
            If more than one '*', the first will be picked if it's not the 1st character.
        smis_frag: str
            SMILES for combining.
            If no '*', assume connection point at the front.
            If more than one '*', the first will be picked.
        """

        # prepare NGram object for use of ext. SMILES
        ngram = NGram()

        # check position of '*'
        mols_base = Chem.MolFromSmiles(smis_base)
        if mols_base is None:
            raise RuntimeError('Invalid base SMILES!')
        idx_base = [i for i in range(mols_base.GetNumAtoms()) if mols_base.GetAtomWithIdx(i).GetSymbol() == '*']

        # rearrange base SMILES to avoid 1st char = '*'
        if len(idx_base) == 1 and idx_base[0] == 0:
            smis_base_head = Chem.MolToSmiles(mols_base,rootedAtAtom=1)
        elif len(idx_base) == 0:
            smis_base_head = smis_base + '*'
        else:
            smis_base_head = smis_base

        # converge base to ext. SMILES and pick insertion location
github yoshida-lab / XenonPy / xenonpy / contrib / sample_codes / iQSPR_V / iQSPR_F.py View on Github external
"""
        combine two SMILES strings with '*' as connection points
        Parameters
        ----------
        smis_base: str
            SMILES for combining.
            If no '*', assume connection point at the end.
            If more than one '*', the first will be picked if it's not the 1st character.
        smis_frag: str
            SMILES for combining.
            If no '*', assume connection point at the front.
            If more than one '*', the first will be picked.
        """

        # prepare NGram object for use of ext. SMILES
        ngram = NGram()

        # check position of '*'
        mols_base = Chem.MolFromSmiles(smis_base)
        if mols_base is None:
            raise RuntimeError('Invalid base SMILES!')
        idx_base = [i for i in range(mols_base.GetNumAtoms()) if mols_base.GetAtomWithIdx(i).GetSymbol() == '*']

        # rearrange base SMILES to avoid 1st char = '*'
        if len(idx_base) == 1 and idx_base[0] == 0:
            smis_base_head = Chem.MolToSmiles(mols_base,rootedAtAtom=1)
        elif len(idx_base) == 0:
            smis_base_head = smis_base + '*'
        else:
            smis_base_head = smis_base

        # converge base to ext. SMILES and pick insertion location