Pandas gives 600 line long ValueError while runing sklearn GridSearchCV · Issue #9928 · pandas-dev/pandas (original) (raw)
I am using sklearn GridSearchCV to classify some data. When I run this code, pandas gives 600 line long error. If I set n_jobs=1, it runs fine without any error. Please kindly take a look at it. Thanks
rfc = RandomForestClassifier(random_state=1)
tuned_parameters = [{'max_features': ['sqrt', 'log2'],
'n_estimators': [20, 100, 200, 500, 1000]}]
clf = GridSearchCV(rfc, tuned_parameters, scoring='roc_auc', cv=3, n_jobs=-1, verbose=2)
clf.fit(X, y)
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] max_features=sqrt, n_estimators=20 ..............................
[CV] max_features=sqrt, n_estimators=20 ..............................
[CV] max_features=sqrt, n_estimators=20 ..............................
[CV] max_features=sqrt, n_estimators=100 .............................
An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (101150, 0))
An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (101150, 0))
An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (101150, 0))
An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (101150, 0))
[CV] max_features=sqrt, n_estimators=200 .............................[CV] max_features=sqrt, n_estimators=100 .............................[CV] max_features=sqrt, n_estimators=100 .............................[CV] max_features=sqrt, n_estimators=200 .............................
---------------------------------------------------------------------------
JoblibValueError Traceback (most recent call last)
<ipython-input-25-99a52d0f19db> in <module>()
4
5 clf = GridSearchCV(rfc, tuned_parameters, scoring='roc_auc', cv=3, n_jobs=-1, verbose=2)
----> 6 clf.fit(X, y)
/home/user/anaconda/lib/python2.7/site-packages/sklearn/grid_search.pyc in fit(self, X, y)
730
731 """
--> 732 return self._fit(X, y, ParameterGrid(self.param_grid))
733
734
/home/user/anaconda/lib/python2.7/site-packages/sklearn/grid_search.pyc in _fit(self, X, y, parameter_iterable)
503 self.fit_params, return_parameters=True,
504 error_score=self.error_score)
--> 505 for parameters in parameter_iterable
506 for train, test in cv)
507
/home/user/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
664 # consumption.
665 self._iterating = False
--> 666 self.retrieve()
667 # Make sure that we get a last message telling us we are done
668 elapsed_time = time.time() - self._start_time
/home/user/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in retrieve(self)
547 # Convert this to a JoblibException
548 exception_type = _mk_exception(exception.etype)[0]
--> 549 raise exception_type(report)
550 raise exception
551 finally:
JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/home/user/anaconda/lib/python2.7/runpy.py in _run_module_as_main(mod_name='IPython.kernel.__main__', alter_argv=1)
157 pkg_name = mod_name.rpartition('.')[0]
158 main_globals = sys.modules["__main__"].__dict__
159 if alter_argv:
160 sys.argv[0] = fname
161 return _run_code(code, main_globals, None,
--> 162 "__main__", fname, loader, pkg_name)
fname = '/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py'
loader = <pkgutil.ImpLoader instance>
pkg_name = 'IPython.kernel'
163
164 def run_module(mod_name, init_globals=None,
165 run_name=None, alter_sys=False):
166 """Execute a module's code without importing it
...........................................................................
/home/user/anaconda/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f8c57a1c630, file "/...ite-packages/IPython/kernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'IPython.kernel', 'app': <module 'IPython.kernel.zmq.kernelapp' from '/ho.../site-packages/IPython/kernel/zmq/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='IPython.kernel')
67 run_globals.update(init_globals)
68 run_globals.update(__name__ = mod_name,
69 __file__ = mod_fname,
70 __loader__ = mod_loader,
71 __package__ = pkg_name)
---> 72 exec code in run_globals
code = <code object <module> at 0x7f8c57a1c630, file "/...ite-packages/IPython/kernel/__main__.py", line 1>
run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'IPython.kernel', 'app': <module 'IPython.kernel.zmq.kernelapp' from '/ho.../site-packages/IPython/kernel/zmq/kernelapp.pyc'>}
73 return run_globals
74
75 def _run_module_code(code, init_globals=None,
76 mod_name=None, mod_fname=None,
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py in <module>()
1
2
----> 3
4 if __name__ == '__main__':
5 from IPython.kernel.zmq import kernelapp as app
6 app.launch_new_instance()
7
8
9
10
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/config/application.py in launch_instance(cls=<class 'IPython.kernel.zmq.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
569
570 If a global instance already exists, this reinitializes and starts it
571 """
572 app = cls.instance(**kwargs)
573 app.initialize(argv)
--> 574 app.start()
app.start = <bound method IPKernelApp.start of <IPython.kernel.zmq.kernelapp.IPKernelApp object>>
575
576 #-----------------------------------------------------------------------------
577 # utility functions, for convenience
578 #-----------------------------------------------------------------------------
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/kernelapp.py in start(self=<IPython.kernel.zmq.kernelapp.IPKernelApp object>)
368 def start(self):
369 if self.poller is not None:
370 self.poller.start()
371 self.kernel.start()
372 try:
--> 373 ioloop.IOLoop.instance().start()
374 except KeyboardInterrupt:
375 pass
376
377 launch_new_instance = IPKernelApp.launch_instance
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
146 PollIOLoop.configure(ZMQIOLoop)
147 return PollIOLoop.instance()
148
149 def start(self):
150 try:
--> 151 super(ZMQIOLoop, self).start()
self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
152 except ZMQError as e:
153 if e.errno == ETERM:
154 # quietly return on ETERM
155 pass
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
835 self._events.update(event_pairs)
836 while self._events:
837 fd, events = self._events.popitem()
838 try:
839 fd_obj, handler_func = self._handlers[fd]
--> 840 handler_func(fd_obj, events)
handler_func = <function null_wrapper>
fd_obj = <zmq.sugar.socket.Socket object>
events = 5
841 except (OSError, IOError) as e:
842 if errno_from_exception(e) == errno.EPIPE:
843 # Happens when the client closes the connection
844 pass
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
270 # Fast path when there are no active contexts.
271 def null_wrapper(*args, **kwargs):
272 try:
273 current_state = _state.contexts
274 _state.contexts = cap_contexts[0]
--> 275 return fn(*args, **kwargs)
args = (<zmq.sugar.socket.Socket object>, 5)
kwargs = {}
276 finally:
277 _state.contexts = current_state
278 null_wrapper._wrapped = True
279 return null_wrapper
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
428 # dispatch events:
429 if events & IOLoop.ERROR:
430 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
431 return
432 if events & IOLoop.READ:
--> 433 self._handle_recv()
self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
434 if not self.socket:
435 return
436 if events & IOLoop.WRITE:
437 self._handle_send()
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
460 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
461 else:
462 if self._recv_callback:
463 callback = self._recv_callback
464 # self._recv_callback = None
--> 465 self._run_callback(callback, msg)
self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
callback = <function null_wrapper>
msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
466
467 # self.update_state()
468
469
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
402 close our socket."""
403 try:
404 # Use a NullContext to ensure that all StackContexts are run
405 # inside our blanket exception handler rather than outside.
406 with stack_context.NullContext():
--> 407 callback(*args, **kwargs)
callback = <function null_wrapper>
args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
kwargs = {}
408 except:
409 gen_log.error("Uncaught exception, closing connection.",
410 exc_info=True)
411 # Close the socket on an uncaught exception from a user callback
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
270 # Fast path when there are no active contexts.
271 def null_wrapper(*args, **kwargs):
272 try:
273 current_state = _state.contexts
274 _state.contexts = cap_contexts[0]
--> 275 return fn(*args, **kwargs)
args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
kwargs = {}
276 finally:
277 _state.contexts = current_state
278 null_wrapper._wrapped = True
279 return null_wrapper
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
247 if self.control_stream:
248 self.control_stream.on_recv(self.dispatch_control, copy=False)
249
250 def make_dispatcher(stream):
251 def dispatcher(msg):
--> 252 return self.dispatch_shell(stream, msg)
msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
253 return dispatcher
254
255 for s in self.shell_streams:
256 s.on_recv(make_dispatcher(s), copy=False)
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/kernelbase.py in dispatch_shell(self=<IPython.kernel.zmq.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"rfc = RandomForestClassifier(random_state=1)\n...auc', cv=3, n_jobs=-1, verbose=2)\nclf.fit(X, y)", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'msg_id': u'122E08A036D8493A821D16CB2F4EE32A', u'msg_type': u'execute_request', u'session': u'D2F6FC266E4F453AB36721F7AF6CAD31', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'122E08A036D8493A821D16CB2F4EE32A', 'msg_type': u'execute_request', 'parent_header': {}})
208 else:
209 # ensure default_int_handler during handler call
210 sig = signal(SIGINT, default_int_handler)
211 self.log.debug("%s: %s", msg_type, msg)
212 try:
--> 213 handler(stream, idents, msg)
handler = <bound method IPythonKernel.execute_request of <IPython.kernel.zmq.ipkernel.IPythonKernel object>>
stream = <zmq.eventloop.zmqstream.ZMQStream object>
idents = ['D2F6FC266E4F453AB36721F7AF6CAD31']
msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u"rfc = RandomForestClassifier(random_state=1)\n...auc', cv=3, n_jobs=-1, verbose=2)\nclf.fit(X, y)", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'msg_id': u'122E08A036D8493A821D16CB2F4EE32A', u'msg_type': u'execute_request', u'session': u'D2F6FC266E4F453AB36721F7AF6CAD31', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'122E08A036D8493A821D16CB2F4EE32A', 'msg_type': u'execute_request', 'parent_header': {}}
214 except Exception:
215 self.log.error("Exception in message handler:", exc_info=True)
216 finally:
217 signal(SIGINT, sig)
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/kernelbase.py in execute_request(self=<IPython.kernel.zmq.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['D2F6FC266E4F453AB36721F7AF6CAD31'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"rfc = RandomForestClassifier(random_state=1)\n...auc', cv=3, n_jobs=-1, verbose=2)\nclf.fit(X, y)", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'msg_id': u'122E08A036D8493A821D16CB2F4EE32A', u'msg_type': u'execute_request', u'session': u'D2F6FC266E4F453AB36721F7AF6CAD31', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'122E08A036D8493A821D16CB2F4EE32A', 'msg_type': u'execute_request', 'parent_header': {}})
357 if not silent:
358 self.execution_count += 1
359 self._publish_execute_input(code, parent, self.execution_count)
360
361 reply_content = self.do_execute(code, silent, store_history,
--> 362 user_expressions, allow_stdin)
user_expressions = {}
allow_stdin = True
363
364 # Flush output before sending the reply.
365 sys.stdout.flush()
366 sys.stderr.flush()
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/kernel/zmq/ipkernel.py in do_execute(self=<IPython.kernel.zmq.ipkernel.IPythonKernel object>, code=u"rfc = RandomForestClassifier(random_state=1)\n...auc', cv=3, n_jobs=-1, verbose=2)\nclf.fit(X, y)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
176
177 reply_content = {}
178 # FIXME: the shell calls the exception handler itself.
179 shell._reply_content = None
180 try:
--> 181 shell.run_cell(code, store_history=store_history, silent=silent)
shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <I....kernel.zmq.zmqshell.ZMQInteractiveShell object>>
code = u"rfc = RandomForestClassifier(random_state=1)\n...auc', cv=3, n_jobs=-1, verbose=2)\nclf.fit(X, y)"
store_history = True
silent = False
182 except:
183 status = u'error'
184 # FIXME: this code right now isn't being used yet by default,
185 # because the run_cell() call above directly fires off exception
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<IPython.kernel.zmq.zmqshell.ZMQInteractiveShell object>, raw_cell=u"rfc = RandomForestClassifier(random_state=1)\n...auc', cv=3, n_jobs=-1, verbose=2)\nclf.fit(X, y)", store_history=True, silent=False, shell_futures=True)
2866 self.displayhook.exec_result = result
2867
2868 # Execute the user code
2869 interactivity = "none" if silent else self.ast_node_interactivity
2870 self.run_ast_nodes(code_ast.body, cell_name,
-> 2871 interactivity=interactivity, compiler=compiler, result=result)
interactivity = 'last_expr'
compiler = <IPython.core.compilerop.CachingCompiler instance>
2872
2873 # Reset this so later displayed values do not modify the
2874 # ExecutionResult
2875 self.displayhook.exec_result = None
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<IPython.kernel.zmq.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-25-99a52d0f19db>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<IPython.core.interactiveshell.ExecutionResult object>)
2976 return True
2977
2978 for i, node in enumerate(to_run_interactive):
2979 mod = ast.Interactive([node])
2980 code = compiler(mod, cell_name, "single")
-> 2981 if self.run_code(code, result):
self.run_code = <bound method ZMQInteractiveShell.run_code of <I....kernel.zmq.zmqshell.ZMQInteractiveShell object>>
code = <code object <module> at 0x7f8c215a43b0, file "<ipython-input-25-99a52d0f19db>", line 6>
result = <IPython.core.interactiveshell.ExecutionResult object>
2982 return True
2983
2984 # Flush softspace
2985 if softspace(sys.stdout, 0):
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<IPython.kernel.zmq.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f8c215a43b0, file "<ipython-input-25-99a52d0f19db>", line 6>, result=<IPython.core.interactiveshell.ExecutionResult object>)
3030 outflag = 1 # happens in more places, so it's easier as default
3031 try:
3032 try:
3033 self.hooks.pre_run_code_hook()
3034 #rprint('Running code', repr(code_obj)) # dbg
-> 3035 exec(code_obj, self.user_global_ns, self.user_ns)
code_obj = <code object <module> at 0x7f8c215a43b0, file "<ipython-input-25-99a52d0f19db>", line 6>
self.user_global_ns = {'ALLOW_THREADS': 1, 'Annotation': <class 'matplotlib.text.Annotation'>, 'Arrow': <class 'matplotlib.patches.Arrow'>, 'Artist': <class 'matplotlib.artist.Artist'>, 'AutoLocator': <class 'matplotlib.ticker.AutoLocator'>, 'Axes': <class 'matplotlib.axes._axes.Axes'>, 'BUFSIZE': 8192, 'Button': <class 'matplotlib.widgets.Button'>, 'CLIP': 0, 'Circle': <class 'matplotlib.patches.Circle'>, ...}
self.user_ns = {'ALLOW_THREADS': 1, 'Annotation': <class 'matplotlib.text.Annotation'>, 'Arrow': <class 'matplotlib.patches.Arrow'>, 'Artist': <class 'matplotlib.artist.Artist'>, 'AutoLocator': <class 'matplotlib.ticker.AutoLocator'>, 'Axes': <class 'matplotlib.axes._axes.Axes'>, 'BUFSIZE': 8192, 'Button': <class 'matplotlib.widgets.Button'>, 'CLIP': 0, 'Circle': <class 'matplotlib.patches.Circle'>, ...}
3036 finally:
3037 # Reset our crash handler in place
3038 sys.excepthook = old_excepthook
3039 except SystemExit as e:
...........................................................................
/home/user/data-analysis/GA/project/final/<ipython-input-25-99a52d0f19db> in <module>()
1 rfc = RandomForestClassifier(random_state=1)
2 tuned_parameters = [{'max_features': ['sqrt', 'log2'],
3 'n_estimators': [20, 100, 200, 500, 1000]}]
4
5 clf = GridSearchCV(rfc, tuned_parameters, scoring='roc_auc', cv=3, n_jobs=-1, verbose=2)
----> 6 clf.fit(X, y)
7
8
9
10
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=3, error_score='raise',
e...e_func=None,
scoring='roc_auc', verbose=2), X= 1516 Quinoa Weisse 18th Street / Against ...0 0.000000 0
[20000 rows x 5814 columns], y=0 False
0 True
1 False
1 ...lse
9999 True
Name: is_five_star, dtype: bool)
727 y : array-like, shape = [n_samples] or [n_samples, n_output], optional
728 Target relative to X for classification or regression;
729 None for unsupervised learning.
730
731 """
--> 732 return self._fit(X, y, ParameterGrid(self.param_grid))
self._fit = <bound method GridSearchCV._fit of GridSearchCV(..._func=None,
scoring='roc_auc', verbose=2)>
X = 1516 Quinoa Weisse 18th Street / Against ...0 0.000000 0
[20000 rows x 5814 columns]
y = 0 False
0 True
1 False
1 ...lse
9999 True
Name: is_five_star, dtype: bool
self.param_grid = [{'max_features': ['sqrt', 'log2'], 'n_estimators': [20, 100, 200, 500, 1000]}]
733
734
735 class RandomizedSearchCV(BaseSearchCV):
736 """Randomized search on hyper parameters.
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=3, error_score='raise',
e...e_func=None,
scoring='roc_auc', verbose=2), X= 1516 Quinoa Weisse 18th Street / Against ...0 0.000000 0
[20000 rows x 5814 columns], y=0 False
0 True
1 False
1 ...lse
9999 True
Name: is_five_star, dtype: bool, parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
500 )(
501 delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
502 train, test, self.verbose, parameters,
503 self.fit_params, return_parameters=True,
504 error_score=self.error_score)
--> 505 for parameters in parameter_iterable
parameters = undefined
parameter_iterable = <sklearn.grid_search.ParameterGrid object>
506 for train, test in cv)
507
508 # Out is a list of triplet: score, estimator, n_test_samples
509 n_fits = len(out)
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<itertools.islice object>)
661 if pre_dispatch == "all" or n_jobs == 1:
662 # The iterable was consumed all at once by the above for loop.
663 # No need to wait for async callbacks to trigger to
664 # consumption.
665 self._iterating = False
--> 666 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
667 # Make sure that we get a last message telling us we are done
668 elapsed_time = time.time() - self._start_time
669 self._print('Done %3i out of %3i | elapsed: %s finished',
670 (len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError Fri Apr 17 23:31:01 2015
PID: 25128 Python 2.7.9: /home/user/anaconda/bin/python
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=RandomForestClassifier(bootstrap=True, class_wei...lse, random_state=1, verbose=0, warm_start=False), X=<class 'pandas.core.frame.DataFrame'> instance, y=0 False
0 True
1 False
1 ...lse
9999 True
Name: is_five_star, dtype: bool, scorer=make_scorer(roc_auc_score, needs_threshold=True), train=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]), test=array([ 0, 1, 2, ..., 6723, 6725, 6729]), verbose=2, parameters={'max_features': 'sqrt', 'n_estimators': 20}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
1447 if parameters is not None:
1448 estimator.set_params(**parameters)
1449
1450 start_time = time.time()
1451
-> 1452 X_train, y_train = _safe_split(estimator, X, y, train)
1453 X_test, y_test = _safe_split(estimator, X, y, test, train)
1454
1455 try:
1456 if y_train is None:
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _safe_split(estimator=RandomForestClassifier(bootstrap=True, class_wei...lse, random_state=1, verbose=0, warm_start=False), X=<class 'pandas.core.frame.DataFrame'> instance, y=0 False
0 True
1 False
1 ...lse
9999 True
Name: is_five_star, dtype: bool, indices=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]), train_indices=None)
1514 if train_indices is None:
1515 X_subset = X[np.ix_(indices, indices)]
1516 else:
1517 X_subset = X[np.ix_(indices, train_indices)]
1518 else:
-> 1519 X_subset = safe_indexing(X, indices)
1520
1521 if y is not None:
1522 y_subset = safe_indexing(y, indices)
1523 else:
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/sklearn/utils/__init__.pyc in safe_indexing(X=<class 'pandas.core.frame.DataFrame'> instance, indices=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]))
147 indices : array-like, list
148 Indices according to which X will be subsampled.
149 """
150 if hasattr(X, "iloc"):
151 # Pandas Dataframes and Series
--> 152 return X.iloc[indices]
153 elif hasattr(X, "shape"):
154 if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
155 indices.dtype.kind == 'i'):
156 # This is often substantially faster than X[indices]
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in __getitem__(self=<pandas.core.indexing._iLocIndexer object>, key=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]))
1212
1213 def __getitem__(self, key):
1214 if type(key) is tuple:
1215 return self._getitem_tuple(key)
1216 else:
-> 1217 return self._getitem_axis(key, axis=0)
1218
1219 def _getitem_axis(self, key, axis=0):
1220 raise NotImplementedError()
1221
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in _getitem_axis(self=<pandas.core.indexing._iLocIndexer object>, key=[6634, 6636, 6638, 6639, 6640, 6642, 6644, 6646, 6648, 6650, 6652, 6653, 6654, 6655, 6656, 6658, 6659, 6660, 6662, 6663, ...], axis=0)
1503 "non-integer key")
1504
1505 # validate the location
1506 self._is_valid_integer(key, axis)
1507
-> 1508 return self._get_loc(key, axis=axis)
1509
1510 def _convert_to_indexer(self, obj, axis=0, is_setter=False):
1511 """ much simpler as we only have to deal with our valid types """
1512
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in _get_loc(self=<pandas.core.indexing._iLocIndexer object>, key=[6634, 6636, 6638, 6639, 6640, 6642, 6644, 6646, 6648, 6650, 6652, 6653, 6654, 6655, 6656, 6658, 6659, 6660, 6662, 6663, ...], axis=0)
87 raise IndexingError('no slices here, handle elsewhere')
88
89 return self.obj._xs(label, axis=axis)
90
91 def _get_loc(self, key, axis=0):
---> 92 return self.obj._ixs(key, axis=axis)
93
94 def _slice(self, obj, axis=0, kind=None):
95 return self.obj._slice(obj, axis=axis, kind=kind)
96
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _ixs(self=<class 'pandas.core.frame.DataFrame'> instance, i=[6634, 6636, 6638, 6639, 6640, 6642, 6644, 6646, 6648, 6650, 6652, 6653, 6654, 6655, 6656, 6658, 6659, 6660, 6662, 6663, ...], axis=0)
1709 return self[i]
1710 else:
1711 label = self.index[i]
1712 if isinstance(label, Index):
1713 # a location index by definition
-> 1714 result = self.take(i, axis=axis)
i = [6634, 6636, 6638, 6639, 6640, 6642, 6644, 6646, 6648, 6650, 6652, 6653, 6654, 6655, 6656, 6658, 6659, 6660, 6662, 6663, ...]
values = undefined
values.DataFrame.stack = undefined
axis = 0
axis.Return = undefined
result = undefined
1715 copy=True
1716 else:
1717 new_values = self._data.fast_xs(i)
1718
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in take(self=<class 'pandas.core.frame.DataFrame'> instance, indices=[6634, 6636, 6638, 6639, 6640, 6642, 6644, 6646, 6648, 6650, 6652, 6653, 6654, 6655, 6656, 6658, 6659, 6660, 6662, 6663, ...], axis=0, convert=True, is_copy=True)
1346 taken : type of caller
1347 """
1348
1349 new_data = self._data.take(indices,
1350 axis=self._get_block_manager_axis(axis),
-> 1351 convert=True, verify=True)
1352 result = self._constructor(new_data).__finalize__(self)
1353
1354 # maybe set copy if we didn't actually change the index
1355 if is_copy:
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in take(self=BlockManager
Items: Index([u'1516 Quinoa Weisse'...ock: slice(5239, 5240, 1), 1 x 20000, dtype: bool, indexer=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]), axis=1, verify=True, convert=True)
3264 raise Exception('Indices must be nonzero and less than '
3265 'the axis length')
3266
3267 new_labels = self.axes[axis].take(indexer)
3268 return self.reindex_indexer(new_axis=new_labels, indexer=indexer,
-> 3269 axis=axis, allow_dups=True)
3270
3271 def merge(self, other, lsuffix='', rsuffix=''):
3272 if not self._is_indexed_like(other):
3273 raise AssertionError('Must have same axes to merge managers')
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in reindex_indexer(self=BlockManager
Items: Index([u'1516 Quinoa Weisse'...ock: slice(5239, 5240, 1), 1 x 20000, dtype: bool, new_axis=Int64Index([3317, 3318, 3319, 3319, 3320, 3321, ...381, 3382, 3382, 3383, 3383, ...], dtype='int64'), indexer=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]), axis=1, fill_value=None, allow_dups=True, copy=True)
3151 indexer, fill_tuple=(fill_value,))
3152 else:
3153 new_blocks = [blk.take_nd(indexer, axis=axis,
3154 fill_tuple=(fill_value if fill_value is not None else
3155 blk.fill_value,))
-> 3156 for blk in self.blocks]
3157
3158 new_axes = list(self.axes)
3159 new_axes[axis] = new_axis
3160 return self.__class__(new_blocks, new_axes)
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in take_nd(self=FloatBlock: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1...6, 97, 98, 99, ...], 5813 x 20000, dtype: float64, indexer=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]), axis=1, new_mgr_locs=None, fill_tuple=(nan,))
846 new_values = com.take_nd(self.get_values(), indexer, axis=axis,
847 allow_fill=False)
848 else:
849 fill_value = fill_tuple[0]
850 new_values = com.take_nd(self.get_values(), indexer, axis=axis,
--> 851 allow_fill=True, fill_value=fill_value)
852
853 if new_mgr_locs is None:
854 if axis == 0:
855 slc = lib.indexer_as_slice(indexer)
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/core/common.pyc in take_nd(arr=memmap([[ 0. , 0. , 0. , ... 0. ,
0. , 0. ]]), indexer=array([ 6634, 6636, 6638, ..., 19997, 19998, 19999]), axis=1, out=array([[ 0., 0., 0., ..., 0., 0., 0.],
...0.],
[ 0., 0., 0., ..., 0., 0., 0.]]), fill_value=nan, mask_info=None, allow_fill=True)
818
819 func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype,
820 axis=axis, mask_info=mask_info)
821
822 indexer = _ensure_int64(indexer)
--> 823 func(arr, indexer, out, fill_value)
824
825 if flip_order:
826 out = out.T
827 return out
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/algos.so in pandas.algos.take_2d_axis1_float64_float64 (pandas/algos.c:95466)()
4000
4001
4002
4003
4004
-> 4005
4006
4007
4008
4009
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/algos.so in View.MemoryView.memoryview_cwrapper (pandas/algos.c:175798)()
609
610
611
612
613
--> 614
615
616
617
618
...........................................................................
/home/user/anaconda/lib/python2.7/site-packages/pandas/algos.so in View.MemoryView.memoryview.__cinit__ (pandas/algos.c:172387)()
316
317
318
319
320
--> 321
322
323
324
325
ValueError: buffer source array is read-only
___________________________________________________________________________
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.9.final.0
python-bits: 64
OS: Linux
OS-release: 3.16.0-34-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.16.0
nose: 1.3.6
Cython: 0.22
numpy: 1.9.2
scipy: 0.15.1
statsmodels: 0.6.1
IPython: 3.1.0
sphinx: 1.2.3
patsy: 0.3.0
dateutil: 2.4.2
pytz: 2015.2
bottleneck: None
tables: 3.1.1
numexpr: 2.3.1
matplotlib: 1.4.3
openpyxl: 2.0.2
xlrd: 0.9.3
xlwt: 0.7.5
xlsxwriter: 0.7.2
lxml: 3.4.2
bs4: 4.3.2
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 0.9.9
pymysql: None
psycopg2: 2.6 (dt dec pq3 ext)