Bug when using multi-row inserts with SQLite database · Issue #29921 · pandas-dev/pandas (original) (raw)

Code Sample

import logging, sqlite3 import pandas as pd

from datetime import date, timedelta from random import normalvariate, randint

#%% Storing function. def store_df_data(): """ Create a new SQLite database, generate random data (as DataFrame) and try to save it in the database. """ # Defining logger. logging.basicConfig(format="%(levelname)-5s - %(message)s", level=logging.INFO) logger = logging.getLogger("sqlite_test")

# Create SQLite database.
conn = sqlite3.connect("financial_dataframe.db")
cursor = conn.cursor()

# Create table (only if it doesn't exist).
sql = "SELECT name FROM sqlite_master WHERE type='table' AND name='financial_data'"
cursor.execute(sql)

if cursor.fetchone() is None:
    sql = "CREATE TABLE financial_data (id INTEGER PRIMARY KEY, "
    sql += "date TEXT NOT NULL, cp REAL NOT NULL, volume REAL NOT NULL)"
    cursor.execute(sql)

# Generate data and adjust dataframe.
df = simulate_goog_data(30, date(2019, 10, 1), 1205.1)

# Insert data one row per time.
logger.info("====================================="
          + "=====================================")
logger.info("Inserting data one per row.")

try:
    df.to_sql("financial_data", con=conn, if_exists="append", index=False)
    logger.info("Data inserted successfully!")
except:
    logger.exception("Error while inserting data one per row:")

# Generate data again.
df = simulate_goog_data(30, date(2019, 11, 1), df.iloc[-1]["cp"])

# Insert data with multirow method.
logger.info("====================================="
          + "=====================================")
logger.info("Inserting data with multirow method.")

try:
    df.to_sql("financial_data", con=conn, if_exists="append", index=False, \
              method="multi")
    logger.info("Data inserted successfully!")
except:
    logger.exception("Error while inserting data with multirow method:")

# Generate data again.
df = simulate_goog_data(30, date(2019, 12, 1), df.iloc[-1]["cp"])

# Insert data with multirow method and chunksize.
logger.info("====================================="
          + "=====================================")
logger.info("Inserting data with multirow method and chunksize.")

try:
    df.to_sql("financial_data", con=conn, if_exists="append", index=False, \
              method="multi", chunksize=10)
    logger.info("Data inserted successfully!")
except:
    logger.exception("Error while inserting data with multirow and chunksize:")

# Close pointers.
cursor.close()
conn.close()

#%% Simulating function. def simulate_goog_data(n_data, initial_date, initial_close): """ Simulate data from GOOG.

:Parameters:
    n_data : Max. range of days to generate data.
    initial_date : First date for the simulation.
    initial_close : First close for the simulation.
"""
# Mean and standard deviation from GOOG return (october 2019 daily data).
mean_goog = 0.002240703
std_dev_goog = 0.011957324

# Minimum and maximum value for GOOG's volume.
min_vol_goog = 840000
max_vol_goog = 2615000

# Initializing lists.
date_list = list()
close_list = list()
volume_list = list()

# First close and date.
current_date = initial_date
current_close = initial_close

# Generate data (only for working days).
for _ in range(n_data):
    if current_date.isoweekday() < 6:
        date_list.append(current_date)
        close_list.append(current_close)
        volume_list.append(randint(min_vol_goog, max_vol_goog))

        # Update goog's close.
        current_close *= (1 + normalvariate(mean_goog, std_dev_goog))

    # Move forward one day.
    current_date += timedelta(days=1)

# Return dataframe.
return pd.DataFrame({"date": date_list, "cp": close_list, "volume": volume_list})

#%% Executor. if name == "main": store_df_data()

Problem description

The piece of code described above tries to insert simulated financial data in a SQLite database, using method to_sql from pandas.DataFrame. It tries to make these insertions in three different ways:

  1. Inserting all data one row at a time. The insertion performs successfully.
  2. Inserting all data using attribute method="multi" to performs a multi-row insert. It crushes.
  3. Inserting all data using attribute method="multi" and setting a chunksize. It also crushes.

It seems that it's impossible insert data in a SQLite database using multi-row option, since it throws the error:

TypeError: insert() takes exactly 2 arguments (1 given)

Could you please check this?

Thank you very much in advance!

Output

INFO  - ==========================================================================
INFO  - Inserting data one per row.
INFO  - Data inserted successfully!
INFO  - ==========================================================================
INFO  - Inserting data with multirow method.
ERROR - Error while inserting data with multirow method:
Traceback (most recent call last):
  File "bug_multirow_sqlite.py", line 52, in store_df_data
    df.to_sql("financial_data", con=conn, if_exists="append", index=False, method="multi")
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 2531, in to_sql
    dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 460, in to_sql
    chunksize=chunksize, dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 1547, in to_sql
    table.insert(chunksize, method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 686, in insert
    exec_insert(conn, keys, chunk_iter)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 609, in _execute_insert_multi
    conn.execute(self.table.insert(data))
TypeError: insert() takes exactly 2 arguments (1 given)
INFO  - ==========================================================================
INFO  - Inserting data with multirow method and chunksize.
ERROR - Error while inserting data with multirow method and chunksize:
Traceback (most recent call last):
  File "bug_multirow_sqlite.py", line 64, in store_df_data
    df.to_sql("financial_data", con=conn, if_exists="append", index=False, method="multi", chunksize=10)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 2531, in to_sql
    dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 460, in to_sql
    chunksize=chunksize, dtype=dtype, method=method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 1547, in to_sql
    table.insert(chunksize, method)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 686, in insert
    exec_insert(conn, keys, chunk_iter)
  File "C:\Users\jconstanzo\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\sql.py", line 609, in _execute_insert_multi
    conn.execute(self.table.insert(data))
TypeError: insert() takes exactly 2 arguments (1 given)

Code was run in: