Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_estimators(frameworks, model_types, objective_type):
# frameworks: ["xgboost", "lightgbm", "sklearn", "catboost", "tf", "keras", "fastai", "all"]
# model types: ["tree_based", "linear", "deep_learning"]
xgb_min_ver = "0.9"
lgb_min_ver = "2.3.1"
sklearn_min_ver = "0.22"
# TODO: add exclude frameworks
estimators = []
for framework in frameworks:
if str.startswith(framework, "xgb"):
try:
import xgboost as xgb
xgb_ver = xgb.__version__
if ge_version(xgb_ver, xgb_min_ver):
if any(item.startswith('linear') for item in model_types):
if objective_type == "classification":
estimators.append("XGBLinearClassifier")
else:
estimators.append("XGBLinearRegressor")
if any(item.startswith('tree') for item in model_types):
if objective_type == "classification":
estimators.append("XGBTreeClassifier")
else:
estimators.append("XGBTreeRegressor")
except:
# TODO: log that xgboost is not installed in the right version
continue
if str.startswith(framework, ("lgb", "lightgbm")):
def get_default_conda_env():
"""
:return: The default Conda environment for MLflow Models produced by calls to
:func:`save_model()` and :func:`log_model()`.
"""
import xgboost as xgb
return _mlflow_conda_env(
additional_conda_deps=None,
# XGBoost is not yet available via the default conda channels, so we install it via pip
additional_pip_deps=[
"xgboost=={}".format(xgb.__version__),
],
additional_conda_channels=None)
def __init__(self, params):
super(XgbLearner, self).__init__(params)
self.library_version = xgb.__version__
self.model_file = self.uid + ".xgb.model"
self.model_file_path = os.path.join(storage_path, self.model_file)
self.boosting_rounds = additional.get(
"one_step", 50
) # params.get("boosting_rounds", 50)
self.max_iters = additional.get("max_steps", 3)
self.learner_params = {
"booster": self.params.get("booster", "gbtree"),
"objective": self.params.get("objective"),
"eval_metric": self.params.get("eval_metric"),
"eta": self.params.get("eta", 0.01),
"max_depth": self.params.get("max_depth", 1),
"min_child_weight": self.params.get("min_child_weight", 1),
"subsample": self.params.get("subsample", 0.8),
"colsample_bytree": self.params.get("colsample_bytree", 0.8),
MOCK_MODULES = ['scipy', 'scipy.sparse', 'sklearn', 'pandas']
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock()
# -- General configuration ------------------------------------------------
# General information about the project.
project = u'xgboost'
author = u'%s developers' % project
copyright = u'2019, %s' % author
github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/'
os.environ['XGBOOST_BUILD_DOC'] = '1'
# Version information.
import xgboost
version = xgboost.__version__
release = xgboost.__version__
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [
'matplotlib.sphinxext.plot_directive',
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'sphinx.ext.intersphinx',
'breathe'
]
graphviz_output_format = 'png'
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
plot_html_show_source_link = False
# General information about the project.
project = u'xgboost'
author = u'%s developers' % project
copyright = u'2015, %s' % author
github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/'
# add markdown parser
MarkdownParser.github_doc_root = github_doc_root
source_parsers = {
'.md': MarkdownParser,
}
os.environ['XGBOOST_BUILD_DOC'] = '1'
# Version information.
import xgboost
version = xgboost.__version__
release = xgboost.__version__
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = ['.rst', '.md']
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock()
# -- General configuration ------------------------------------------------
# General information about the project.
project = u'xgboost'
author = u'%s developers' % project
copyright = u'2019, %s' % author
github_doc_root = 'https://github.com/dmlc/xgboost/tree/master/doc/'
os.environ['XGBOOST_BUILD_DOC'] = '1'
# Version information.
import xgboost
version = xgboost.__version__
release = xgboost.__version__
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones
extensions = [
'matplotlib.sphinxext.plot_directive',
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'sphinx.ext.intersphinx',
'breathe'
]
graphviz_output_format = 'png'
plot_formats = [('svg', 300), ('png', 100), ('hires.png', 300)]
plot_html_show_source_link = False
plot_html_show_formats = False
# Save an XGBoost model
xgb_model.save_model(model_data_path)
conda_env_subpath = "conda.yaml"
if conda_env is None:
conda_env = get_default_conda_env()
elif not isinstance(conda_env, dict):
with open(conda_env, "r") as f:
conda_env = yaml.safe_load(f)
with open(os.path.join(path, conda_env_subpath), "w") as f:
yaml.safe_dump(conda_env, stream=f, default_flow_style=False)
pyfunc.add_to_model(mlflow_model, loader_module="mlflow.xgboost",
data=model_data_subpath, env=conda_env_subpath)
mlflow_model.add_flavor(FLAVOR_NAME, xgb_version=xgb.__version__, data=model_data_subpath)
mlflow_model.save(os.path.join(path, "MLmodel"))
"""
Checks that *xgboost* is available.
"""
try:
import xgboost # noqa F401
except ImportError:
return False
from xgboost.core import _LIB
try:
_LIB.XGBoosterDumpModelEx
except AttributeError:
# The version is not recent enough even though it is version 0.6.
# You need to install xgboost from github and not from pypi.
return False
from xgboost import __version__
vers = LooseVersion(__version__)
allowed = LooseVersion('0.7')
if vers < allowed:
warnings.warn('The converter works for xgboost >= 0.7. Earlier versions might not.')
return True
# check for unsupported combinations of feature_perturbation and model_outputs
if feature_perturbation == "tree_path_dependent":
if model_output != "margin":
raise ValueError("Only margin model_output is supported for feature_perturbation=\"tree_path_dependent\"")
elif data is None:
raise ValueError("A background dataset must be provided unless you are using feature_perturbation=\"tree_path_dependent\"!")
if model_output != "margin":
if self.model.objective is None and self.model.tree_output is None:
raise Exception("Model does not have a known objective or output type! When model_output is " \
"not \"margin\" then we need to know the model's objective or link function.")
# A bug in XGBoost fixed in v0.81 makes XGBClassifier fail to give margin outputs
if safe_isinstance(model, "xgboost.sklearn.XGBClassifier") and model_output != "margin":
import xgboost
if LooseVersion(xgboost.__version__) < LooseVersion('0.81'):
raise RuntimeError("A bug in XGBoost fixed in v0.81 makes XGBClassifier fail to give margin outputs! Please upgrade to XGBoost >= v0.81!")
# compute the expected value if we have a parsed tree for the cext
if self.model_output == "logloss":
self.expected_value = self.__dynamic_expected_value
elif data is not None:
try:
self.expected_value = self.model.predict(self.data, output=model_output).mean(0)
except:
raise Exception("Currently TreeExplainer can only handle models with categorical splits when " \
"feature_perturbation=\"tree_path_dependent\" and no background data is passed. Please try again using " \
"shap.TreeExplainer(model, feature_perturbation=\"tree_path_dependent\").")
if hasattr(self.expected_value, '__len__') and len(self.expected_value) == 1:
self.expected_value = self.expected_value[0]
elif hasattr(self.model, "node_sample_weight"):
self.expected_value = self.model.values[:,0].sum(0)