How to use the mathy.agent.training.mcts.MCTS function in mathy

To help you get started, we’ve selected a few mathy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github justindujardin / mathy / main.py View on Github external
}
            env_state, prob = mathy_env.get_initial_state(options)

            # Configure MCTS options for train/eval
            if eval_run:
                num_rollouts = 500
                num_exploration_moves = 0
                epsilon = 0.0
            else:
                num_rollouts = 250
                num_exploration_moves = int(mathy_env.max_moves * 0.8)
                epsilon = 0.9

            # Execute episode
            model = mathy_eval if eval_run else mathy
            mcts = MCTS(mathy_env, model, epsilon, num_rollouts)
            actor = ActorMCTS(mcts, num_exploration_moves)
            final_result = None
            time_steps: List[deprecated_MathyEnvObservation] = []
            episode_steps = 0
            start = time.time()
            while final_result is None:
                episode_steps = episode_steps + 1
                env_state, train_example, final_result = actor.step(
                    mathy_env, env_state, model, time_steps
                )

            elapsed = time.time() - start
            episode_examples, episode_reward, is_win = final_result
            lesson_experience_count += len(episode_examples)
            lesson_problem_count += 1
            if is_win:
github justindujardin / mathy / gym_env.py View on Github external
from typing import Optional

import gym
import numpy as np
import plac
import tensorflow as tf
from gym.envs.registration import register

from mathy.agent.controller import MathModel
from mathy.agent.training.mcts import MCTS
from mathy.gym import MathyGymEnv
from mathy.a3c import A3CAgent, A3CArgs

__mcts: Optional[MCTS] = None
__model: Optional[MathModel] = None
__agent: Optional[A3CAgent] = None


def mathy_load_model(gym_env: MathyGymEnv):
    global __model
    if __model is None:
        import os

        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "5"
        tf.compat.v1.logging.set_verbosity("CRITICAL")
        __model = MathModel(gym_env.mathy.action_size, "agents/ablated")
        __model.start()


def mathy_free_model():
github justindujardin / mathy / mathy / agent / training / practice_runner.py View on Github external
This function executes one episode.
        As the game is played, each turn is added as a training example to
        trainExamples. The game continues until get_state_value returns a non-zero
        value, then the outcome of the game is used to assign values to each example
        in trainExamples.
        """
        if game is None:
            raise NotImplementedError("PracticeRunner.get_game returned None type")
        if predictor is None:
            raise NotImplementedError("PracticeRunner.get_predictor returned None type")

        env_state, complexity = game.get_initial_state()

        episode_history = []
        move_count = 0
        mcts = MCTS(game, predictor, self.config.cpuct, self.config.num_mcts_sims)
        actor = ActorMCTS(mcts, self.config.num_exploration_moves)
        while True:
            move_count += 1
            env_state, result = actor.step(game, env_state, predictor, episode_history)
            if result is not None:
                return result + (complexity,)
github justindujardin / mathy / gym_env.py View on Github external
def mcts_start_problem(gym_env: MathyGymEnv):
    global __mcts, __model
    num_rollouts = 500
    epsilon = 0.0
    mathy_load_model(gym_env)
    assert __model is not None
    __mcts = MCTS(
        env=gym_env.mathy,
        model=__model,
        cpuct=0.0,
        num_mcts_sims=num_rollouts,
        epsilon=epsilon,
    )
github justindujardin / mathy / mathy / agent / training / practice_runner.py View on Github external
This function executes one episode.
        As the game is played, each turn is added as a training example to
        trainExamples. The game continues until get_state_value returns a non-zero
        value, then the outcome of the game is used to assign values to each example
        in trainExamples.
        """
        if game is None:
            raise NotImplementedError("PracticeRunner.get_game returned None type")
        if predictor is None:
            raise NotImplementedError("PracticeRunner.get_predictor returned None type")

        env_state, complexity = game.get_initial_state()

        episode_history = []
        move_count = 0
        mcts = MCTS(game, predictor, self.config.cpuct, self.config.num_mcts_sims)
        actor = ActorMCTS(mcts, self.config.num_exploration_moves)
        while True:
            move_count += 1
            env_state, result = actor.step(game, env_state, predictor, episode_history)
            if result is not None:
                return result + (complexity,)
github justindujardin / mathy / exam.py View on Github external
print("\n[exam] {} - {}...".format(plan.name.lower(), lesson.name.lower()))
        # Fill up a certain amount of experience per problem type
        lesson_experience_count = 0
        if lesson.num_observations is not None:
            iter_experience = lesson.num_observations
        else:
            iter_experience = short_term_size
        while lesson_experience_count < iter_experience:
            env_state, complexity = controller.get_initial_state(print_problem=False)
            complexity_value = complexity * 3
            controller.max_moves = (
                lesson.max_turns if lesson.max_turns is not None else complexity_value
            )
            # generate a new problem now that we've set the max_turns
            env_state, complexity = controller.get_initial_state()
            mcts = MCTS(controller, model, epsilon, mcts_sims)
            actor = ActorMCTS(mcts, num_exploration_moves)
            final_result = None
            time_steps = []
            episode_steps = 0
            start = time.time()
            while final_result is None:
                episode_steps = episode_steps + 1
                env_state, train_example, final_result = actor.step(
                    controller, env_state, model, time_steps
                )

            elapsed = time.time() - start
            episode_examples, episode_reward, is_win = final_result
            lesson_experience_count += len(episode_examples)
            if is_win:
                num_solved = num_solved + 1