read_sql chokes on mysql when using labels with queries due to unnecessary quoting (original) (raw)
Not sure if this is a pandas bug or an upstream one, but here's an example of the bug (pandas-0.14.1, mariadb 10, sqlalchemy-0.9.4)
engine=create_engine('mysql://{username}:{password}@{host}/{database}?charset=utf8'.format(**db)) pandas.io.sql.read_sql('SELECT onlinetransactions.id FROM onlinetransactions LIMIT 1', engine) #Does what you'd expect pandas.io.sql.read_sql('SELECT onlinetransactions.id as firstid FROM onlinetransactions LIMIT 1', engine) #Fails
The error you get back is:
C:\Anaconda\lib\site-packages\pandas\io\sql.pyc in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns) 421 coerce_float=coerce_float, parse_dates=parse_dates) 422 --> 423 if pandas_sql.has_table(sql): 424 pandas_sql.meta.reflect(only=[sql]) 425 return pandas_sql.read_table(
C:\Anaconda\lib\site-packages\pandas\io\sql.pyc in has_table(self, name) 847 848 def has_table(self, name): --> 849 return self.engine.has_table(name) 850 851 def get_table(self, table_name):
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in has_table(self, table_name, schema) 1757 1758 """ -> 1759 return self.run_callable(self.dialect.has_table, table_name, schema) 1760 1761 def raw_connection(self):
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in run_callable(self, callable_, *args, **kwargs) 1661 """ 1662 with self.contextual_connect() as conn: -> 1663 return conn.run_callable(callable_, *args, **kwargs) 1664 1665 def execute(self, statement, *multiparams, **params):
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in run_callable(self, callable_, *args, **kwargs) 1188 1189 """ -> 1190 return callable_(self, *args, **kwargs) 1191 1192 def _run_visitor(self, visitorcallable, element, **kwargs):
C:\Anaconda\lib\site-packages\sqlalchemy\dialects\mysql\base.pyc in has_table(self, connection, table_name, schema) 2274 try: 2275 try: -> 2276 rs = connection.execute(st) 2277 have = rs.fetchone() is not None 2278 rs.close()
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in execute(self, object, *multiparams, **params) 710 """ 711 if isinstance(object, util.string_types[0]): --> 712 return self._execute_text(object, multiparams, params) 713 try: 714 meth = object._execute_on_connection
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in _execute_text(self, statement, multiparams, params) 859 statement, 860 parameters, --> 861 statement, parameters 862 ) 863 if self._has_events or self.engine._has_events:
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args) 945 parameters, 946 cursor, --> 947 context) 948 949 if self._has_events or self.engine._has_events:
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context) 1106 self.dialect.dbapi.Error, 1107 connection_invalidated=self._is_disconnect), -> 1108 exc_info 1109 ) 1110
C:\Anaconda\lib\site-packages\sqlalchemy\util\compat.pyc in raise_from_cause(exception, exc_info) 183 exc_info = sys.exc_info() 184 exc_type, exc_value, exc_tb = exc_info --> 185 reraise(type(exception), exception, tb=exc_tb) 186 187 if py3k:
C:\Anaconda\lib\site-packages\sqlalchemy\engine\base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args) 938 statement, 939 parameters, --> 940 context) 941 except Exception as e: 942 self._handle_dbapi_exception(
C:\Anaconda\lib\site-packages\sqlalchemy\engine\default.pyc in do_execute(self, cursor, statement, parameters, context) 433 434 def do_execute(self, cursor, statement, parameters, context=None): --> 435 cursor.execute(statement, parameters) 436 437 def do_execute_no_params(self, cursor, statement, context=None):
C:\Anaconda\lib\site-packages\MySQLdb\cursors.pyc in execute(self, query, args) 203 del tb 204 self.messages.append((exc, value)) --> 205 self.errorhandler(self, exc, value) 206 self._executed = query 207 if not self._defer_warnings: self._warning_check()
C:\Anaconda\lib\site-packages\MySQLdb\connections.pyc in defaulterrorhandler(failed resolving arguments) 34 del cursor 35 del connection ---> 36 raise errorclass, errorvalue 37 38 re_numeric_part = re.compile(r"^(\d+)")
ProgrammingError: (ProgrammingError) (1103, "Incorrect table name 'SELECT onlinetransactions.id as firstid FROM onlinetransactions LIMIT 1'") 'DESCRIBE SELECT onlinetransactions.id as firstid FROM onlinetransactions LIMIT 1' ()
So it never gets as far as running the actual query, because it's tried to run a DESCRIBE query with ``` quotes which fails. i.e.
MariaDB [transactions]> DESCRIBE `SELECT onlinetransactions.id as firstid FROM onlinetransactions LIMIT 1`;
ERROR 1103 (42000): Incorrect table name 'SELECT onlinetransactions.id as firstid FROM onlinetransactions LIMIT 1'
But
MariaDB [transactions]> DESCRIBE SELECT onlinetransactions.id as firstid FROM onlinetransactions LIMIT 1;
+------+-------------+--------------------+-------+---------------+--------------------------------+---------+------+----------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+-------------+--------------------+-------+---------------+--------------------------------+---------+------+----------+-------------+
| 1 | SIMPLE | onlinetransactions | index | NULL | ix_OnlineTransactions_Rpt_Year | 5 | NULL | 11485535 | Using index |
+------+-------------+--------------------+-------+---------------+--------------------------------+---------+------+----------+-------------+
1 row in set (0.00 sec)
Ok. So looking at the stacktrace, I reckon this is a pandas bug as it seems to be calling has_table on my sql query, which doesn't seem to make any sense?