How to use the mathy.util.GameRewards function in mathy

To help you get started, we’ve selected a few mathy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github justindujardin / mathy / mathy / math_game.py View on Github external
return time_step.termination(features, self.get_win_signal(env_state))

        # Check the turn count last because if the previous move that incremented
        # the turn over the count resulted in a win-condition, we want it to be honored.
        if env_state.agent.moves_remaining <= 0:
            return time_step.termination(features, GameRewards.LOSE)

        if len(agent.history) > 0:
            last_timestep = agent.history[-1]
            rule = self.get_rule_from_timestep(last_timestep)
            # The rewarding_actions can be user specified
            for rewarding_class in self.rewarding_actions:
                if isinstance(rule, rewarding_class):
                    return time_step.transition(
                        features,
                        reward=GameRewards.HELPFUL_MOVE,
                        discount=self.discount,
                    )

        # The agent is penalized for returning to a previous state.
        for key, group in groupby(
            sorted([f"{h.raw}" for h in env_state.agent.history])
        ):
            list_group = list(group)
            list_count = len(list_group)
            if list_count <= 1:
                continue

            return time_step.transition(
                features, reward=GameRewards.PREVIOUS_LOCATION, discount=self.discount
            )
github justindujardin / mathy / mathy / math_game.py View on Github external
def get_win_signal(self, env_state: MathEnvironmentState) -> float:
        """Calculate the reward value for completing the episode. This is done
        so that the reward signal can be scaled based on the time it took to 
        complete the episode """
        total_moves = env_state.max_moves
        # guard against divide by zero with max and a small value
        current_move = round(
            max(3e-10, total_moves - env_state.agent.moves_remaining), 3
        )
        bonus = (total_moves / current_move) / total_moves
        if current_move < total_moves / 2:
            bonus *= 2
        return GameRewards.WIN + bonus
github justindujardin / mathy / mathy / math_game.py View on Github external
features,
                        reward=GameRewards.HELPFUL_MOVE,
                        discount=self.discount,
                    )

        # The agent is penalized for returning to a previous state.
        for key, group in groupby(
            sorted([f"{h.raw}" for h in env_state.agent.history])
        ):
            list_group = list(group)
            list_count = len(list_group)
            if list_count <= 1:
                continue

            return time_step.transition(
                features, reward=GameRewards.PREVIOUS_LOCATION, discount=self.discount
            )

        # We're in a new state, and the agent is a little older.
        return time_step.transition(
            features, reward=GameRewards.TIMESTEP, discount=self.discount
        )
github justindujardin / mathy / mathy / math_game.py View on Github external
if (
            env_state.agent.problem_type == MODE_SIMPLIFY_POLYNOMIAL
            and not has_like_terms(root)
        ):
            term_nodes = get_terms(root)
            is_win = True
            for term in term_nodes:
                if not is_preferred_term_form(term):
                    is_win = False
            if is_win:
                return time_step.termination(features, self.get_win_signal(env_state))

        # Check the turn count last because if the previous move that incremented
        # the turn over the count resulted in a win-condition, we want it to be honored.
        if env_state.agent.moves_remaining <= 0:
            return time_step.termination(features, GameRewards.LOSE)

        if len(agent.history) > 0:
            last_timestep = agent.history[-1]
            rule = self.get_rule_from_timestep(last_timestep)
            # The rewarding_actions can be user specified
            for rewarding_class in self.rewarding_actions:
                if isinstance(rule, rewarding_class):
                    return time_step.transition(
                        features,
                        reward=GameRewards.HELPFUL_MOVE,
                        discount=self.discount,
                    )

        # The agent is penalized for returning to a previous state.
        for key, group in groupby(
            sorted([f"{h.raw}" for h in env_state.agent.history])
github justindujardin / mathy / mathy / agents / experience.py View on Github external
def add_frame(self, frame: ExperienceFrame):
        if frame.terminal and len(self._frames) > 0 and self._frames[-1].terminal:
            # Discard if terminal frame continues
            return

        frame_index = self._top_frame_index + len(self._frames)
        was_full = self.is_full()

        # append frame
        self._frames.append(frame)

        # append index if there are enough (because we replay 4 at a time)
        if frame_index >= 3:
            # UNREAL uses 0 or non-zero, but we have a penatly timestep, so
            # consider anything less than that to be zero.
            if frame.reward <= GameRewards.TIMESTEP:
                self._zero_reward_indices.append(frame_index)
            else:
                self._non_zero_reward_indices.append(frame_index)

        if was_full:
            self._top_frame_index += 1

            cut_frame_index = self._top_frame_index + 3
            # Cut frame if its index is lower than cut_frame_index.
            if (
                len(self._zero_reward_indices) > 0
                and self._zero_reward_indices[0] < cut_frame_index
            ):
                self._zero_reward_indices.popleft()

            if (
github justindujardin / mathy / mathy / agents / r2d2 / learner.py View on Github external
def rp_samples(self, max_samples=2) -> Tuple[MathyBatchObservation, List[float]]:
        output: MathyBatchObservation = MathyBatchObservation([], [], [], [[], []])
        rewards: List[float] = []
        if self.experience.is_full() is False:
            return output, rewards
        windows: List[MathyWindowObservation] = []
        for i in range(max_samples):
            frames = self.experience.sample_rp_sequence()
            # 4 frames
            states = [frame.state for frame in frames[:-1]]
            target_reward = frames[-1].reward
            if math.isclose(target_reward, GameRewards.TIMESTEP):
                sample_label = 0  # zero
            elif target_reward > 0:
                sample_label = 1  # positive
            else:
                sample_label = 2  # negative
            windows.append(observations_to_window(states))
            rewards.append(sample_label)
        return windows_to_batch(windows), rewards
github justindujardin / mathy / mathy / a3c / experience.py View on Github external
def add_frame(self, frame: ExperienceFrame):
        if frame.terminal and len(self._frames) > 0 and self._frames[-1].terminal:
            # Discard if terminal frame continues
            return

        frame_index = self._top_frame_index + len(self._frames)
        was_full = self.is_full()

        # append frame
        self._frames.append(frame)

        # append index if there are enough (because we replay 4 at a time)
        if frame_index >= 3:
            # UNREAL uses 0 or non-zero, but we have a penatly timestep, so
            # consider anything less than that to be zero.
            if frame.reward <= GameRewards.TIMESTEP:
                self._zero_reward_indices.append(frame_index)
            else:
                self._non_zero_reward_indices.append(frame_index)

        if was_full:
            self._top_frame_index += 1

            cut_frame_index = self._top_frame_index + 3
            # Cut frame if its index is lower than cut_frame_index.
            if (
                len(self._zero_reward_indices) > 0
                and self._zero_reward_indices[0] < cut_frame_index
            ):
                self._zero_reward_indices.popleft()

            if (