# coding: utf8
"""
SklearnRegressor Class
======================
Interpolation using regressors from Scikit-Learn.
:Example:
::
>> from batman.surrogate import SklearnRegressor
>> import numpy as np
>> sample = np.array([[2, 4], [3, 5], [6, 9]])
>> data = np.array([[12, 1], [10, 2], [9, 4]])
>> regressor = 'RandomForestRegressor()'
>> predictor = SklearnRegressor(sample, data, regressor)
>> point = (5.0, 8.0)
>> predictor.evaluate(point)
array([9.7, 2.9])
"""
import logging
import warnings
import numpy as np
from ..misc import (NestedPool, cpu_system)
from ..functions.utils import multi_eval
[docs]class SklearnRegressor(object):
"""Interface to Scikit-learn regressors."""
logger = logging.getLogger(__name__)
[docs] def __init__(self, sample, data, regressor):
r"""Create the predictor.
Uses sample and data to construct a predictor using sklearn.
Input is to be normalized before and depending on the number of
parameters, the kernel is adapted to be anisotropic.
:param array_like sample: Sample used to generate the data
(n_samples, n_features).
:param array_like data: Observed data (n_samples, n_features).
:param regressor: Scikit-Learn regressor.
:type regressor: Either regressor object or
str(:mod:`sklearn.ensemble`.Regressor)
"""
try:
sample[0][0]
except (TypeError, IndexError):
pass
else:
sample = np.array(sample).reshape(len(sample), -1)
self.model_len = data.shape[1]
if self.model_len == 1:
data = data.ravel()
# Define the CPU multi-threading/processing strategy
n_cpu_system = cpu_system()
self.n_cpu = n_cpu_system if n_cpu_system // (self.model_len) < 1 else\
self.model_len
self.n_cpu = 1 if self.n_cpu == 0 else self.n_cpu
try:
# Regressor is already a sklearn object
self.logger.debug('Regressor info:\n{}'.format(regressor.get_params))
except AttributeError:
# Instanciate regressor from str
try:
regressor = eval('ske.' + regressor, {'__builtins__': None},
{'ske': __import__('sklearn').ensemble})
except (TypeError, AttributeError):
raise AttributeError('Regressor unknown from sklearn.')
self.logger.debug('Regressor info:\n{}'.format(regressor.get_params))
def model_fitting(column):
"""Fit an instance of :class:`sklearn.ensemble`.Regressor."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
data = regressor.fit(sample, column)
return data
# Create a predictor per data, parallelize if several data
if self.model_len > 1:
pool = NestedPool(self.n_cpu)
results = pool.imap(model_fitting, data.T)
self.regressor = list(results)
pool.terminate()
else:
self.regressor = [model_fitting(data)]
@multi_eval
def evaluate(self, point):
"""Make a prediction.
From a point, make a new prediction.
:param array_like point: The point to evaluate (n_features,).
:return: The predictions.
:rtype: array_like (n_features,).
"""
point_array = np.asarray(point).reshape(1, -1)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Compute a prediction per predictor
prediction = [reg.predict(point_array) for reg in self.regressor]
return np.array(prediction)