Sklearn match check matches JoblibValueError
I am using sklearn.cross_validation.cross_val_score
to evaluate my model. Below is a portion of my code: -
""" 5-fold Cross Validation """
print "*** 5-fold Cross Validation"
shuffle = ShuffleSplit(len(y), n_iter=5)
clf = LinearSVC(penalty = 'l2')
print "Doing cross-validation"
cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6,
print numpy.unique(y)
print cv_res
My laptop has 8 cores. Setting n_jobs
to 6 should be fine. But after waiting a long time, I am getting an exception like this: -
*** 5-fold Cross Validation
Doing cross-validation
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
[CV] no parameters to be set .........................................
Traceback (most recent call last):
File "/Users/cwang/Documents/workspace/NameSuggestion@Verisign/classification_DMOZ/", line 118, in <module>
File "/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/", line 1151, in cross_val_score
for train, test in cv)
File "/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/", line 660, in __call__
File "/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/", line 543, in retrieve
raise exception_type(report)
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
Multiprocessing exception:
/Users/cwang/Documents/workspace/NameSuggestion@Verisign/classification_DMOZ/ in <module>()
113 print "*** 5-fold Cross Validation"
114 shuffle = ShuffleSplit(len(y), n_iter=5)
115 clf = LinearSVC(penalty = 'l2')
116 print "Doing cross-validation"
117 cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6,
--> 118 scoring=precision_recall_fscore_support)
119 print numpy.unique(y)
120 print cv_res
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/ in cross_val_score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
random_state=None, tol=0.0001, verbose=0), X=<1060047x5834248 sparse matrix of type '<type 'n... stored elements in Compressed Sparse Row format>, y=array([ 0, 0, 0, ..., 12, 12, 12]), scoring=<function precision_recall_fscore_support>, cv=ShuffleSplit(1060047, n_iter=5, test_size=0.1, random_state=None), n_jobs=6, verbose=2, fit_params=None, score_func=None, pre_dispatch='2*n_jobs')
1146 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
1147 pre_dispatch=pre_dispatch)
1148 scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
1149 train, test, verbose, None,
1150 fit_params)
-> 1151 for train, test in cv)
cv = ShuffleSplit(1060047, n_iter=5, test_size=0.1, random_state=None)
1152 return np.array(scores)[:, 0]
1155 def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters,
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/ in __call__(self=Parallel(n_jobs=6), iterable=<itertools.islice object>)
655 if pre_dispatch == "all" or n_jobs == 1:
656 # The iterable was consumed all at once by the above for loop.
657 # No need to wait for async callbacks to trigger to
658 # consumption.
659 self._iterating = False
--> 660 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=6)>
661 # Make sure that we get a last message telling us we are done
662 elapsed_time = time.time() - self._start_time
663 self._print('Done %3i out of %3i | elapsed: %s finished',
664 (len(self._output),
Sub-process traceback:
ValueError Wed Jun 24 04:10:51 2015
PID: 38884 Python 2.7.10: /Users/cwang/anaconda/bin/python
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
random_state=None, tol=0.0001, verbose=0), X=<1060047x5834248 sparse matrix of type '<type 'n... stored elements in Compressed Sparse Row format>, y=array([ 0, 0, 0, ..., 12, 12, 12]), scorer=<function precision_recall_fscore_support>, train=array([ 957438, 1011254, 296495, ..., 943276, 380023, 86700]), test=array([992319, 113779, 271246, ..., 901889, 607534, 582009]), verbose=2, parameters=None, fit_params={}, return_train_score=False, return_parameters=False)
1235 X_test, y_test = _safe_split(estimator, X, y, test, train)
1236 if y_train is None:
1237, **fit_params)
1238 else:
1239, y_train, **fit_params)
-> 1240 test_score = _score(estimator, X_test, y_test, scorer)
1241 if return_train_score:
1242 train_score = _score(estimator, X_train, y_train, scorer)
1244 scoring_time = time.time() - start_time
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
random_state=None, tol=0.0001, verbose=0), X_test=<106005x5834248 sparse matrix of type '<type 'nu... stored elements in Compressed Sparse Row format>, y_test=array([12, 0, 1, ..., 11, 7, 7]), scorer=<function precision_recall_fscore_support>)
1291 def _score(estimator, X_test, y_test, scorer):
1292 """Compute the score of an estimator on a given test set."""
1293 if y_test is None:
1294 score = scorer(estimator, X_test)
1295 else:
-> 1296 score = scorer(estimator, X_test, y_test)
1297 if not isinstance(score, numbers.Number):
1298 raise ValueError("scoring must return a number, got %s (%s) instead."
1299 % (str(score), type(score)))
1300 return score
/Users/cwang/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.pyc in precision_recall_fscore_support(y_true=LinearSVC(C=1.0, class_weight=None, dual=True, f...',
random_state=None, tol=0.0001, verbose=0), y_pred=<106005x5834248 sparse matrix of type '<type 'nu... stored elements in Compressed Sparse Row format>, beta=array([12, 0, 1, ..., 11, 7, 7]), labels=None, pos_label=1, average=None, warn_for=('precision', 'recall', 'f-score'), sample_weight=None)
1661 """
1662 average_options = (None, 'micro', 'macro', 'weighted', 'samples')
1663 if average not in average_options:
1664 raise ValueError('average has to be one of ' +
1665 str(average_options))
-> 1666 if beta <= 0:
1667 raise ValueError("beta should be >0 in the F-beta score")
1669 y_type, y_true, y_pred = _check_clf_targets(y_true, y_pred)
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Does anyone know how I can fix this?
source to share
1 answer
You are passing a scoring function where you have to go through the score counter.
cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6, scoring=precision_recall_fscore_support)
should be
cv_res = cross_val_score(clf, X, y, cv=shuffle, verbose=2, n_jobs = 6, scoring=sklearn.metrics.make_scorer(precision_recall_fscore_support) )
source to share