Bug when using multi-row inserts with SQLite database · Issue #29921 · pandas-dev/pandas (original) (raw)
Code Sample
import logging, sqlite3 import pandas as pd
from datetime import date, timedelta from random import normalvariate, randint
#%% Storing function. def store_df_data(): """ Create a new SQLite database, generate random data (as DataFrame) and try to save it in the database. """ # Defining logger. logging.basicConfig(format="%(levelname)-5s - %(message)s", level=logging.INFO) logger = logging.getLogger("sqlite_test")
# Create SQLite database.
conn = sqlite3.connect("financial_dataframe.db")
cursor = conn.cursor()
# Create table (only if it doesn't exist).
sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='financial_data'"
cursor.execute(sql)
if cursor.fetchone() is None:
sql = "CREATE TABLE financial_data (id INTEGER PRIMARY KEY, "
sql += "date TEXT NOT NULL, cp REAL NOT NULL, volume REAL NOT NULL)"
cursor.execute(sql)
# Generate data and adjust dataframe.
df = simulate_goog_data(30, date(2019, 10, 1), 1205.1)
# Insert data one row per time.
logger.info("====================================="
+ "=====================================")
logger.info("Inserting data one per row.")
try:
df.to_sql("financial_data", con=conn, if_exists="append", index=False)
logger.info("Data inserted successfully!")
except:
logger.exception("Error while inserting data one per row:")
# Generate data again.
df = simulate_goog_data(30, date(2019, 11, 1), df.iloc[-1]["cp"])
# Insert data with multirow method.
logger.info("====================================="
+ "=====================================")
logger.info("Inserting data with multirow method.")
try:
df.to_sql("financial_data", con=conn, if_exists="append", index=False, \
method="multi")
logger.info("Data inserted successfully!")
except:
logger.exception("Error while inserting data with multirow method:")
# Generate data again.
df = simulate_goog_data(30, date(2019, 12, 1), df.iloc[-1]["cp"])
# Insert data with multirow method and chunksize.
logger.info("====================================="
+ "=====================================")
logger.info("Inserting data with multirow method and chunksize.")
try:
df.to_sql("financial_data", con=conn, if_exists="append", index=False, \
method="multi", chunksize=10)
logger.info("Data inserted successfully!")
except:
logger.exception("Error while inserting data with multirow and chunksize:")
# Close pointers.
cursor.close()
conn.close()
#%% Simulating function. def simulate_goog_data(n_data, initial_date, initial_close): """ Simulate data from GOOG.
:Parameters:
n_data : Max. range of days to generate data.
initial_date : First date for the simulation.
initial_close : First close for the simulation.
"""
# Mean and standard deviation from GOOG return (october 2019 daily data).
mean_goog = 0.002240703
std_dev_goog = 0.011957324
# Minimum and maximum value for GOOG's volume.
min_vol_goog = 840000
max_vol_goog = 2615000
# Initializing lists.
date_list = list()
close_list = list()
volume_list = list()
# First close and date.
current_date = initial_date
current_close = initial_close
# Generate data (only for working days).
for _ in range(n_data):
if current_date.isoweekday() < 6:
date_list.append(current_date)
close_list.append(current_close)
volume_list.append(randint(min_vol_goog, max_vol_goog))
# Update goog's close.
current_close *= (1 + normalvariate(mean_goog, std_dev_goog))
# Move forward one day.
current_date += timedelta(days=1)
# Return dataframe.
return pd.DataFrame({"date": date_list, "cp": close_list, "volume": volume_list})
#%% Executor. if name == "main": store_df_data()
Problem description
The piece of code described above tries to insert simulated financial data in a SQLite database, using method to_sql
from pandas.DataFrame. It tries to make these insertions in three different ways:
- Inserting all data one row at a time. The insertion performs successfully.
- Inserting all data using attribute
method="multi"
to performs a multi-row insert. It crushes. - Inserting all data using attribute
method="multi"
and setting achunksize
. It also crushes.
It seems that it's impossible insert data in a SQLite database using multi-row option, since it throws the error:
TypeError: insert() takes exactly 2 arguments (1 given)
Could you please check this?
Thank you very much in advance!
Output
INFO - ==========================================================================
INFO - Inserting data one per row.
INFO - Data inserted successfully!
INFO - ==========================================================================
INFO - Inserting data with multirow method.
ERROR - Error while inserting data with multirow method:
Traceback (most recent call last):
File "bug_multirow_sqlite.py", line 52, in store_df_data
df.to_sql("financial_data", con=conn, if_exists="append", index=False, method="multi")
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 2531, in to_sql
dtype=dtype, method=method)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 460, in to_sql
chunksize=chunksize, dtype=dtype, method=method)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 1547, in to_sql
table.insert(chunksize, method)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 686, in insert
exec_insert(conn, keys, chunk_iter)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 609, in _execute_insert_multi
conn.execute(self.table.insert(data))
TypeError: insert() takes exactly 2 arguments (1 given)
INFO - ==========================================================================
INFO - Inserting data with multirow method and chunksize.
ERROR - Error while inserting data with multirow method and chunksize:
Traceback (most recent call last):
File "bug_multirow_sqlite.py", line 64, in store_df_data
df.to_sql("financial_data", con=conn, if_exists="append", index=False, method="multi", chunksize=10)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 2531, in to_sql
dtype=dtype, method=method)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 460, in to_sql
chunksize=chunksize, dtype=dtype, method=method)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 1547, in to_sql
table.insert(chunksize, method)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 686, in insert
exec_insert(conn, keys, chunk_iter)
File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 609, in _execute_insert_multi
conn.execute(self.table.insert(data))
TypeError: insert() takes exactly 2 arguments (1 given)
Code was run in:
- Windows 10 - Python 3.7.3 - pandas 0.24.2
- Red Hat 7.6 - Python 3.6.8 - pandas 0.25.0