bulk_insert_test.py 1.74 KB
import pandas as pd
import numpy as np

import bulk_insert

#%%
table_name = "##COHORT_BULK_INSERT_TEST"

test_data = [
      [578,'29389','2011-09-03'],
      [332,'11384','2011-09-07'],
      [372,'14487','2011-09-07'],
      [331,'41384','2011-09-07'],
      [931,'24587','2011-10-03']
      ]

df_test_data = pd.DataFrame(test_data)

#%%
table_column_def = '''
    PAT_ID VARCHAR(18) NOT NULL, 
    MRN VARCHAR(30) NOT NULL, 
    DELIVERY_DATE DATETIME NOT NULL'''

column_names = ["MRN","PAT_ID","DELIVERY_DATE"]
column_types = ['NUM','STR','DT']

#%%
import bulk_insert

print(bulk_insert.create_table_sql(table_name, table_column_def))
#%%
insert_sql_generator = bulk_insert.generate_insert_sql(table_name, column_names, column_types, df_test_data, max_insert=3)
print(next(insert_sql_generator))

#%%
print(next(insert_sql_generator))

#%%
#Dataframes should never be passed to this function
#print(bulk_insert.format_data_for_insert(df_test_data, ['NUM','STR','DT']))

print(bulk_insert.format_data_for_insert(test_data, ['NUM','STR','DT']))


#%%

print(bulk_insert.collect_insert_sql(table_name, column_names, column_types, df_test_data, max_insert=3))



#%% Test with clarity

import clarity_to_csv as ctc
#conn = ctc.get_clarity_engine().connect()

create_create_table_sql = """
    DROP TABLE IF EXISTS {table_name};
    CREATE TABLE {table_name} ({table_column_def});
    """.format(table_name=table_name, table_column_def=table_column_def)

import bulk_insert
insert_sql_generator = bulk_insert.generate_insert_sql(table_name, column_names, column_types, df_test_data, max_insert=3)

insert_sql1 = next(insert_sql_generator)
insert_sql2 = next(insert_sql_generator)
#%%
conn.execute(create_create_table_sql)
conn.execute(insert_sql1)
conn.execute(insert_sql2)