How to use the contextualbandits.online.SeparateClassifiers function in contextualbandits

To help you get started, we’ve selected a few contextualbandits examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github david-cortes / contextualbandits / contextualbandits / evaluation.py View on Github external
if c is not None:
        assert isinstance(c, float)
    if pmin is not None:
        assert isinstance(pmin, float)
    
    if type(reward_estimator)==np.ndarray:
        assert reward_estimator.shape[1]==2
        assert reward_estimator.shape[0]==X.shape[0]
        rhat_new = reward_estimator[:, 0]
        rhat_old = reward_estimator[:, 1]
    elif 'predict_proba_separate' in dir(reward_estimator):
        rhat = reward_estimator.predict_proba_separate(X)
        rhat_new = rhat[np.arange(rhat.shape[0]), pred]
        rhat_old = rhat[np.arange(rhat.shape[0]), a]
    elif 'predict_proba' in dir(reward_estimator):
        reward_estimator = SeparateClassifiers(reward_estimator, nchoices)
        reward_estimator.fit(X, a, r)
        rhat = reward_estimator.predict_proba_separate(X)
        rhat_new = rhat[np.arange(rhat.shape[0]), pred]
        rhat_old = rhat[np.arange(rhat.shape[0]), a]
    else:
        error_msg = "'reward_estimator' must be either an array, a classifier with"
        error_msg += "'predict_proba', or a 'SeparateClassifiers' object."
        raise ValueError(error_msg)
    
    if handle_invalid:
        rhat_new[rhat_new==1]=np.random.beta(3,1,size=rhat_new.shape)[rhat_new==1]
        rhat_new[rhat_new==0]=np.random.beta(1,3,size=rhat_new.shape)[rhat_new==0]
        rhat_old[rhat_old==1]=np.random.beta(3,1,size=rhat_old.shape)[rhat_old==1]
        rhat_old[rhat_old==0]=np.random.beta(1,3,size=rhat_old.shape)[rhat_old==0]
    
    if c is not None:
github david-cortes / contextualbandits / contextualbandits / offpolicy.py View on Github external
Reward estimates for the actions that were chosen by the policy.
        """
        try:
            from costsensitive import RegressionOneVsRest, WeightedAllPairs
        except:
            raise ValueError("This functionality requires package 'costsensitive'.\nCan be installed with 'pip install costsensitive'.")
        p = _check_1d_inp(p)
        assert p.shape[0] == X.shape[0]
        l = -r
        
        if type(self.reward_estimator) == np.ndarray:
            C = self.reward_estimator
        elif 'predict_proba_separate' in dir(self.reward_estimator):
            C = -self.reward_estimator.predict_proba_separate(X)
        elif 'predict_proba' in dir(self.reward_estimator):
            reward_estimator = SeparateClassifiers(self.reward_estimator, self.nchoices, beta_prior = self.beta_prior, smoothing = self.smoothing)
            reward_estimator.fit(X, a, r)
            C = -reward_estimator.predict_proba_separate(X)
        else:
            raise ValueError("Error: couldn't obtain reward estimates. Are you passing the right input to 'reward_estimator'?")
        
        if self.handle_invalid:
            C[C == 1] = np.random.beta(3, 1, size = C.shape)[C == 1]
            C[C == 0] = np.random.beta(1, 3, size = C.shape)[C == 0]
        
        if self.c is not None:
            p = self.c * p
        if self.pmin is not None:
            p = np.clip(p, a_min = self.pmin, a_max = None)
        
        C[np.arange(C.shape[0]), a] += (l - C[np.arange(C.shape[0]), a]) / p.reshape(-1)
        if self.method == 'rovr':