Sklearn match check matches JoblibValueError

Question

Sklearn match check matches JoblibValueError

I am using sklearn.cross_validation.cross_val_score

to evaluate my model. Below is a portion of my code: -

""" 5-fold Cross Validation """
print "*** 5-fold Cross Validation"
shuffle = ShuffleSplit(len(y), n_iter=5)
clf = LinearSVC(penalty = 'l2')
print "Doing cross-validation"
cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6,
                         scoring=precision_recall_fscore_support) 
print numpy.unique(y)
print cv_res

My laptop has 8 cores. Setting n_jobs

to 6 should be fine. But after waiting a long time, I am getting an exception like this: -

*** 5-fold Cross Validation
Doing cross-validation
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
Traceback (most recent call last):
  File "/Users/cwang/Documents/workspace/NameSuggestion@Verisign/classification_DMOZ/Baykan2011.py", line 118, in <module>
    scoring=precision_recall_fscore_support) 
  File "/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.py", line 1151, in cross_val_score
    for train, test in cv)
  File "/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 660, in __call__
    self.retrieve()
  File "/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 543, in retrieve
    raise exception_type(report)
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
    ...........................................................................
/Users/cwang/Documents/workspace/NameSuggestion@Verisign/classification_DMOZ/Baykan2011.py in <module>()
    113     print "*** 5-fold Cross Validation"
    114     shuffle = ShuffleSplit(len(y), n_iter=5)
    115     clf = LinearSVC(penalty = 'l2')
    116     print "Doing cross-validation"
    117     cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6,
--> 118                              scoring=precision_recall_fscore_support) 
    119     print numpy.unique(y)
    120     print cv_res
    121     
    122         

...........................................................................
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.py in cross_val_score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
     random_state=None, tol=0.0001, verbose=0), X=<1060047x5834248 sparse matrix of type '<type 'n... stored elements in Compressed Sparse Row format>, y=array([ 0,  0,  0, ..., 12, 12, 12]), scoring=<function precision_recall_fscore_support>, cv=ShuffleSplit(1060047, n_iter=5, test_size=0.1, random_state=None), n_jobs=6, verbose=2, fit_params=None, score_func=None, pre_dispatch='2*n_jobs')
   1146     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
   1147                         pre_dispatch=pre_dispatch)
   1148     scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
   1149                                               train, test, verbose, None,
   1150                                               fit_params)
-> 1151                       for train, test in cv)
        cv = ShuffleSplit(1060047, n_iter=5, test_size=0.1, random_state=None)
   1152     return np.array(scores)[:, 0]
   1153 
   1154 
   1155 def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,

...........................................................................
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=6), iterable=<itertools.islice object>)
    655             if pre_dispatch == "all" or n_jobs == 1:
    656                 # The iterable was consumed all at once by the above for loop.
    657                 # No need to wait for async callbacks to trigger to
    658                 # consumption.
    659                 self._iterating = False
--> 660             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=6)>
    661             # Make sure that we get a last message telling us we are done
    662             elapsed_time = time.time() - self._start_time
    663             self._print('Done %3i out of %3i | elapsed: %s finished',
    664                         (len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    ValueError                                         Wed Jun 24 04:10:51 2015
PID: 38884                  Python 2.7.10: /Users/cwang/anaconda/bin/python
...........................................................................
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
     random_state=None, tol=0.0001, verbose=0), X=<1060047x5834248 sparse matrix of type '<type 'n... stored elements in Compressed Sparse Row format>, y=array([ 0,  0,  0, ..., 12, 12, 12]), scorer=<function precision_recall_fscore_support>, train=array([ 957438, 1011254,  296495, ...,  943276,  380023,   86700]), test=array([992319, 113779, 271246, ..., 901889, 607534, 582009]), verbose=2, parameters=None, fit_params={}, return_train_score=False, return_parameters=False)
   1235     X_test, y_test = _safe_split(estimator, X, y, test, train)
   1236     if y_train is None:
   1237         estimator.fit(X_train, **fit_params)
   1238     else:
   1239         estimator.fit(X_train, y_train, **fit_params)
-> 1240     test_score = _score(estimator, X_test, y_test, scorer)
   1241     if return_train_score:
   1242         train_score = _score(estimator, X_train, y_train, scorer)
   1243 
   1244     scoring_time = time.time() - start_time

...........................................................................
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
     random_state=None, tol=0.0001, verbose=0), X_test=<106005x5834248 sparse matrix of type '<type 'nu... stored elements in Compressed Sparse Row format>, y_test=array([12,  0,  1, ..., 11,  7,  7]), scorer=<function precision_recall_fscore_support>)
   1291 def _score(estimator, X_test, y_test, scorer):
   1292     """Compute the score of an estimator on a given test set."""
   1293     if y_test is None:
   1294         score = scorer(estimator, X_test)
   1295     else:
-> 1296         score = scorer(estimator, X_test, y_test)
   1297     if not isinstance(score, numbers.Number):
   1298         raise ValueError("scoring must return a number, got %s (%s) instead."
   1299                          % (str(score), type(score)))
   1300     return score

...........................................................................
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.pyc in precision_recall_fscore_support(y_true=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
     random_state=None, tol=0.0001, verbose=0), y_pred=<106005x5834248 sparse matrix of type '<type 'nu... stored elements in Compressed Sparse Row format>, beta=array([12,  0,  1, ..., 11,  7,  7]), labels=None, pos_label=1, average=None, warn_for=('precision', 'recall', 'f-score'), sample_weight=None)
   1661     """
   1662     average_options = (None, 'micro', 'macro', 'weighted', 'samples')
   1663     if average not in average_options:
   1664         raise ValueError('average has to be one of ' +
   1665                          str(average_options))
-> 1666     if beta <= 0:
   1667         raise ValueError("beta should be >0 in the F-beta score")
   1668 
   1669     y_type, y_true, y_pred = _check_clf_targets(y_true, y_pred)
   1670 

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
___________________________________________________________________________

Does anyone know how I can fix this?

Thank.

+3

python scikit-learn

Munichong June 24. 15 at 12:44

source to share

1 answer

Andreus · Answer 1 · 2015-07-02T18:41:28+0000

You are passing a scoring function where you have to go through the score counter.

cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6,
                     scoring=precision_recall_fscore_support)

should be

cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6,
                     scoring=sklearn.metrics.make_scorer(precision_recall_fscore_support) )

Sklearn match check matches JoblibValueError

More articles: