How to use the simpleai.machine_learning.models.Classifier function in simpleai

To help you get started, we’ve selected a few simpleai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github simpleai-team / simpleai / simpleai / machine_learning / classifiers.py View on Github external
ps.append(self.C[class_])
            hypotheses.append((sum(ps), class_))

        if hypotheses:
            logprob, best = max(hypotheses)
            Z = numpy.logaddexp.reduce([p for p, class_ in hypotheses])
            logprob = logprob - Z
        else:  # Something not at all seen in training, return best a priori
            logprob, best = max((p, class_) for class_, p
                                            in self.C.iteritems())
        p = numpy.exp(logprob)
        assert 0.0 <= p and p <= 1.0
        return best, p


class KNearestNeighbors(Classifier):
    """
    Classifies objects based on closest training example.
    Uses the k-nearest examples from the training and
    gets the most common classification among these.

    To use this classifier the problem must define a `distance`
    method to messure the distance between two examples.
    """

    def __init__(self, dataset, problem, k=1):
        self.k = k
        super(KNearestNeighbors, self).__init__(dataset, problem)

    def learn(self):
        try:
            next(iter(self.dataset))
github simpleai-team / simpleai / simpleai / machine_learning / models.py View on Github external
def load(cls, filepath):
        """
        Loads a pickled version of the classifier saved in `filepath`
        """
        with open(filepath) as filehandler:
            classifier = pickle.load(filehandler)

        if not isinstance(classifier, Classifier):
            raise ValueError("Pickled object is not a Classifier")

        return classifier
github simpleai-team / simpleai / simpleai / machine_learning / classifiers.py View on Github external
* K-Nearest Neighbor: See http://en.wikipedia.org/wiki/K-nearest_neighbor
"""

import numpy
from collections import defaultdict
from simpleai.machine_learning.models import Classifier
from simpleai.machine_learning.metrics import Counter, OnlineInformationGain, \
                                              OnlineLogProbability

try:
    import cPickle as pickle
except ImportError:
    import pickle


class DecisionTreeLearner(Classifier):
    """
    This implementation features an algorithm that *strictly* follows the
    pseudocode given in AIMA.

    It's obviously ineficient in too many ways (perhaps incomplete too), but
    it's intended to be used pedagogically.

    See the other implementations in this same file for some discusión and
    issues solved.

    This algorithm is equivalent to ID3.
    """

    def __init__(self, dataset, problem):
        self.dataset = dataset
        self.problem = problem
github simpleai-team / simpleai / simpleai / machine_learning / classifiers.py View on Github external
def set_results_from_counts(self, counts):
        self.counts = counts
        total = sum(counts.itervalues())
        majority = max(counts, key=counts.get)  # Max frequency
        self.result = (majority, counts[majority] / float(total))

    def add_branch(self, value, branch=None):
        assert not value in self.branches
        if branch is None:
            branch = self.__class__()
        self.branches[value] = branch
        branch.parent = self
        return branch


class DecisionTreeLearner_Queued(Classifier):
    """
    This implementations has a few improvements over the one based on the book:
        -It uses a queue instead of recursion, so the python stack limit is
         never reached.
        -In case an attribute has a value not seen in training the intermediate
         nodes can give a "best so far" classification.
        -Abusive re-iteration of the train examples is avoided by calculating
         at the same time all information gains of a single node split.

         This algorithm is equivalent to ID3.
    """

    def learn(self):
        if not self.attributes:
            self.root = self._single_node_tree()
            return
github simpleai-team / simpleai / simpleai / machine_learning / classifiers.py View on Github external
AIMA implies that importance should be information gain.
        Since AIMA only defines it for binary features this implementation
        was based on the wikipedia article:
        http://en.wikipedia.org/wiki/Information_gain_in_decision_trees
        """
        gain_counter = OnlineInformationGain(attribute, self.target)
        for example in examples:
            gain_counter.add(example)
        return gain_counter.get_gain()

    def classify(self, example):
        node = walk_to_leaf(self.root, example)
        return node.result


class NaiveBayes(Classifier):
    """
    Implements a classifier that uses the Bayes' theorem.
    """

    def learn(self):
        # Frequency count of target classes
        self.C = OnlineLogProbability()
        # Frequency count of P(Fi|C):
        self.Fi = defaultdict(lambda:  # For each class,
                      defaultdict(lambda:  # For each attribute,
                          OnlineLogProbability()))  # For each value, count it

        for example in self.dataset:
            class_ = self.target(example)
            self.C.add(class_)
            for attribute in self.attributes: