Skip to content
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation
This project
Loading...
Sign in
cicTeam
/
Clarity_Tools_Selah
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 9cfd5a48
authored
Jul 21, 2021
by
Selah Clarity
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
Deal with nans in import data
1 parent
00038591
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
55 additions
and
4 deletions
bulk_insert.py
bulk_insert_test.py
bulk_insert.py
View file @
9cfd5a4
...
...
@@ -3,6 +3,7 @@
import
pandas
as
pd
import
numpy
as
np
import
sqlalchemy
def
create_table_sql
(
table_name
,
column_def
):
...
...
@@ -19,10 +20,18 @@ def create_and_import(data, table_name, table_def, conn, max_insert=1000):
cts
=
create_table_sql
(
table_name
,
table_def
)
conn
.
execute
(
cts
)
dtypes
=
get_dtypes_from_table_def
(
table_def
)
print
(
dtypes
)
insert_sql_generator
=
generate_insert_sql
(
table_name
,
data
.
columns
,
dtypes
,
data
,
max_insert
=
max_insert
)
fline
=
0
for
insert_chunk_sql
in
insert_sql_generator
:
try
:
#TODO - handle this so long SQL insert doesn't obscure the screen
conn
.
execute
(
insert_chunk_sql
)
# except sqlalchemy.exc.OperationalError as e:
# raise e
except
Exception
as
e
:
print
(
"There was an exception during insert:
\n
"
+
e
.
_message
()
+
e
.
statement
[:
500
]
+
'
\n
'
)
print
(
"Aborting create_and_import()"
)
return
False
end
=
time
.
time
()
duration
=
end
-
start
line_cnt
=
len
(
data
)
...
...
@@ -51,7 +60,9 @@ def format_data_for_insert(rows, column_types):
row_fmt
=
zip
(
row
,
column_types
)
items_fmttd
=
[]
for
(
item
,
fmt
)
in
row_fmt
:
if
(
fmt
==
'STR'
)
|
(
fmt
==
'DT'
):
if
item
is
np
.
nan
:
item_fmttd
=
'NULL'
elif
(
fmt
==
'STR'
)
|
(
fmt
==
'DT'
):
item_fmttd
=
"'{}'"
.
format
(
item
)
else
:
item_fmttd
=
"{}"
.
format
(
item
)
...
...
bulk_insert_test.py
View file @
9cfd5a4
import
pandas
as
pd
import
numpy
as
np
import
unittest
import
sqlalchemy
...
...
@@ -17,6 +18,42 @@ class TestStuff(unittest.TestCase):
def
test_something
(
self
):
self
.
assertEqual
(
2
+
1
,
3
)
def
test_format_row_for_insert_nans
(
self
):
insert_chunk
=
np
.
array
([[
61
,
'hello'
,
'2020-08-21'
],[
np
.
NAN
,
np
.
NAN
,
np
.
NAN
]],
dtype
=
'object'
)
column_types
=
[
'NUM'
,
'STR'
,
'DT'
]
rows_of_data
=
bulk_insert
.
format_data_for_insert
(
insert_chunk
,
column_types
)
self
.
assertEqual
(
rows_of_data
,
"(61,'hello','2020-08-21'),
\n
(NULL,NULL,NULL)"
)
def
test_burris_pull_enc_short
(
self
):
projdatadir
=
'C:
\\
Users
\\
LynchSe
\\
Documents
\\
Data
\\
Burris_Geobirth
\\
'
d
=
'2021_05_26'
dfr
=
pd
.
read_csv
(
projdatadir
+
'cohort_pat_delivery_{}.csv'
.
format
(
d
))
#.sample(1000)
df
=
dfr
.
iloc
[
30053
:
30069
]
import
bulk_insert
table_def
=
'''
HUP_MRN VARCHAR(30),
PAT_ID VARCHAR(18),
DELIVERY_DATE DATETIME,
HOSPITAL VARCHAR(20)
'''
bulk_insert
.
create_and_import
(
df
,
'##test_burris_pull_enc_short'
,
table_def
,
conn
,
max_insert
=
200
)
def
test_burris_pull_enc
(
self
):
projdatadir
=
'C:
\\
Users
\\
LynchSe
\\
Documents
\\
Data
\\
Burris_Geobirth
\\
'
d
=
'2021_05_26'
df
=
pd
.
read_csv
(
projdatadir
+
'cohort_pat_delivery_{}.csv'
.
format
(
d
))
#.sample(1000)
import
bulk_insert
table_def
=
'''
HUP_MRN VARCHAR(30),
PAT_ID VARCHAR(18),
DELIVERY_DATE DATETIME,
HOSPITAL VARCHAR(20)
'''
bulk_insert
.
create_and_import
(
df
,
'##test_burris_pull_enc'
,
table_def
,
conn
,
max_insert
=
1000
)
def
test_integration_1
(
self
):
# Integration test, desired workflow as of July 2021
...
...
@@ -44,8 +81,11 @@ class TestStuff(unittest.TestCase):
if
__name__
==
'__main__'
:
unittest
.
main
()
# t = TestStuff()
# unittest.main()
t
=
TestStuff
()
t
.
test_burris_pull_enc
()
t
.
test_burris_pull_enc_short
()
# t.test_format_row_for_insert_nans()
# t.test_something()
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment