bulk_insert_test.py
1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
import numpy as np
import bulk_insert
#%%
table_name = "##COHORT_BULK_INSERT_TEST"
test_data = [
[578,'29389','2011-09-03'],
[332,'11384','2011-09-07'],
[372,'14487','2011-09-07'],
[331,'41384','2011-09-07'],
[931,'24587','2011-10-03']
]
df_test_data = pd.DataFrame(test_data)
#%%
table_column_def = '''
PAT_ID VARCHAR(18) NOT NULL,
MRN VARCHAR(30) NOT NULL,
DELIVERY_DATE DATETIME NOT NULL'''
column_names = ["MRN","PAT_ID","DELIVERY_DATE"]
column_types = ['NUM','STR','DT']
#%%
import bulk_insert
print(bulk_insert.create_table_sql(table_name, table_column_def))
#%%
insert_sql_generator = bulk_insert.generate_insert_sql(table_name, column_names, column_types, df_test_data, max_insert=3)
print(next(insert_sql_generator))
#%%
print(next(insert_sql_generator))
#%%
#Dataframes should never be passed to this function
#print(bulk_insert.format_data_for_insert(df_test_data, ['NUM','STR','DT']))
print(bulk_insert.format_data_for_insert(test_data, ['NUM','STR','DT']))
#%%
print(bulk_insert.collect_insert_sql(table_name, column_names, column_types, df_test_data, max_insert=3))
#%% Test with clarity
import clarity_to_csv as ctc
#conn = ctc.get_clarity_engine().connect()
create_create_table_sql = """
DROP TABLE IF EXISTS {table_name};
CREATE TABLE {table_name} ({table_column_def});
""".format(table_name=table_name, table_column_def=table_column_def)
import bulk_insert
insert_sql_generator = bulk_insert.generate_insert_sql(table_name, column_names, column_types, df_test_data, max_insert=3)
insert_sql1 = next(insert_sql_generator)
insert_sql2 = next(insert_sql_generator)
#%%
conn.execute(create_create_table_sql)
conn.execute(insert_sql1)
conn.execute(insert_sql2)