Skip to content
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation
This project
Loading...
Sign in
cicTeam
/
Clarity_Tools_Selah
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 188330a5
authored
Jul 30, 2021
by
Selah Clarity
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
various edits to data pulling tools
1 parent
9cfd5a48
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
82 additions
and
44 deletions
bulk_insert_test.py
clarity_to_csv.py
clarity_to_csv_test.py
bulk_insert_test.py
View file @
188330a
...
...
@@ -15,6 +15,23 @@ conn = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu').connect()
class
TestStuff
(
unittest
.
TestCase
):
#How to deal with, I don't know!!
def
test_error_in_burris_meds_insert
(
self
):
datadir
=
"C:/Users/LynchSe/Documents/Data/Burris_Geobirth/"
dfmed
=
pd
.
read_csv
(
datadir
+
"from_burris_lab/Copy of GeoBirth_med_counts_20210713.csv"
)
sgids
=
dfmed
.
SIMPLE_GENERIC_C
.
unique
()
dfsgids_raw
=
pd
.
DataFrame
({
'SIMPLE_GENERIC_C'
:
sgids
})
dfsgids
=
dfsgids_raw
.
loc
[
18
:
23
]
import
bulk_insert
table_def
=
'''
SIMPLE_GENERIC_C INT
'''
#there were overflow issues
bulk_insert
.
create_and_import
(
dfsgids
,
'##sgmedids'
,
table_def
,
conn
)
def
test_something
(
self
):
self
.
assertEqual
(
2
+
1
,
3
)
...
...
@@ -38,6 +55,8 @@ class TestStuff(unittest.TestCase):
HOSPITAL VARCHAR(20)
'''
bulk_insert
.
create_and_import
(
df
,
'##test_burris_pull_enc_short'
,
table_def
,
conn
,
max_insert
=
200
)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
def
test_burris_pull_enc
(
self
):
projdatadir
=
'C:
\\
Users
\\
LynchSe
\\
Documents
\\
Data
\\
Burris_Geobirth
\\
'
...
...
@@ -52,6 +71,7 @@ class TestStuff(unittest.TestCase):
HOSPITAL VARCHAR(20)
'''
bulk_insert
.
create_and_import
(
df
,
'##test_burris_pull_enc'
,
table_def
,
conn
,
max_insert
=
1000
)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
def
test_integration_1
(
self
):
...
...
@@ -67,6 +87,7 @@ class TestStuff(unittest.TestCase):
conn
=
ctc
.
get_clarity_engine
()
.
connect
()
bulk_insert
.
create_and_import
(
dfc
,
'##cohort_sample2'
,
tabledef
,
conn
)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
def
test_integration_2
(
self
):
...
...
@@ -77,15 +98,17 @@ class TestStuff(unittest.TestCase):
import
bulk_insert
table_def
=
'MRN VARCHAR(100)'
bulk_insert
.
create_and_import
(
dfcohort_inp1
[[
'mrn'
]],
'##cohort_inp'
,
table_def
,
conn
,
max_insert
=
1000
)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
if
__name__
==
'__main__'
:
# unittest.main()
t
=
TestStuff
()
t
.
test_burris_pull_enc
()
t
.
test_burris_pull_enc_short
()
t
.
test_error_in_burris_meds_insert
()
# t.test_burris_pull_enc()
# t.test_burris_pull_enc_short()
# t.test_format_row_for_insert_nans()
# t.test_something()
''''''
clarity_to_csv.py
View file @
188330a
...
...
@@ -37,6 +37,8 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl
def
clarity_to_csv
(
sqlfilename
,
csvfilenames
,
dbconn
=
None
):
print
(
"Running SQL from {}"
.
format
(
sqlfilename
))
import
time
start
=
time
.
time
()
with
open
(
sqlfilename
,
'r'
)
as
sqlfile
:
sqltext
=
sqlfile
.
read
()
eng
=
get_clarity_engine
()
...
...
@@ -44,7 +46,11 @@ def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
clarity_to_csv_inner
(
sqltext
,
csvfilenames
,
dbconn
)
else
:
with
eng
.
connect
()
as
sqalconn
:
clarity_to_csv_inner
(
sqltext
,
csvfilenames
,
dbconn
)
clarity_to_csv_inner
(
sqltext
,
csvfilenames
,
sqalconn
)
end
=
time
.
time
()
duration
=
end
-
start
print
(
"Query ran and exported in {:.1f} s"
.
format
(
duration
))
def
clarity_to_csv_inner
(
sqltext
,
csvfilenames
,
sqalconn
,
verbose
=
False
):
...
...
clarity_to_csv_test.py
View file @
188330a
...
...
@@ -4,9 +4,8 @@ import pandas as pd
from
unittest.mock
import
MagicMock
import
sqlparse
#%%
testquerydir
=
'C:
\\
Users
\\
LynchSe
\\
Documents
\\
Repos
\\
Covid19Related
\\
selah
\\
Clarity_Tools_Selah
\\
'
testquerydir
=
'C:
\\
Users
\\
LynchSe
\\
Documents
\\
Repos
\\
rClarity_Tools_Selah
\\
clarity_to_csv_tests
\\
'
testdatadir
=
'C:
\\
Users
\\
LynchSe
\\
Documents
\\
Data
\\
Clarity_Tools_Selah
\\
'
...
...
@@ -17,17 +16,42 @@ def line_count(filename):
return
len
(
myfile
.
readlines
())
def
remove_files
(
filenamelist
):
import
os
import
subprocess
for
filename
in
filenamelist
:
try
:
subprocess
.
check_output
(
'rm {}'
.
format
(
filename
))
except
Exception
as
e
:
print
(
e
)
pass
for
filename
in
filenamelist
:
if
filename
is
None
:
next
elif
os
.
path
.
isfile
(
filename
):
try
:
subprocess
.
check_output
(
'rm {}'
.
format
(
filename
))
except
Exception
as
e
:
print
(
e
)
pass
else
:
print
(
"Skipping removal because not recognized as file - {}"
.
format
(
filename
))
class
TestStuff
(
unittest
.
TestCase
):
def
test_remove_file_not_there
(
self
):
#make it not make that stupid shitty error
remove_files
([
'poop.csv'
])
def
test_integration_test
(
self
):
sqlfilename1
=
testquerydir
+
"testCohort.sql"
sqlfilename2
=
testquerydir
+
"readTestCohort.sql"
genericcsvs
=
[
testdatadir
+
'test1.csv'
,
testdatadir
+
'test2.csv'
,
]
remove_files
(
genericcsvs
)
with
ctc
.
get_clarity_engine
()
.
connect
()
as
sqalconn
:
ctc
.
clarity_to_csv
(
sqlfilename1
,
genericcsvs
,
dbconn
=
sqalconn
)
ctc
.
clarity_to_csv
(
sqlfilename2
,
genericcsvs
,
dbconn
=
sqalconn
)
def
test_comment_with_semicolon
(
self
):
sqltext
=
'''
SELECT TOP 2 PAT_ID FROM PAT_ENC;
...
...
@@ -55,20 +79,8 @@ class TestStuff(unittest.TestCase):
def
integration_test
(
self
):
sqlfilename1
=
testquerydir
+
"testCohort.sql"
sqlfilename2
=
testquerydir
+
"readTestCohort.sql"
genericcsvs
=
[
testdatadir
+
'test1.csv'
,
testdatadir
+
'test2.csv'
,
]
remove_files
(
genericcsvs
)
with
ctc
.
get_clarity_engine
()
.
connect
()
as
sqalconn
:
ctc
.
clarity_to_csv
(
sqlfilename1
,
genericcsvs
,
dbconn
=
sqalconn
)
ctc
.
clarity_to_csv
(
sqlfilename2
,
genericcsvs
,
dbconn
=
sqalconn
)
def
unicode_error
(
self
):
def
test_unicode_error
(
self
):
genericcsvs
=
[
testdatadir
+
'test_cohort.csv'
]
...
...
@@ -131,25 +143,22 @@ class TestStuff(unittest.TestCase):
#TODO - deal with wrong number of csv's supplied
#%%
if
__name__
==
'__main__'
:
tests_to_run
=
[
"test_comment_with_semicolon"
# , "test_none_csv"
# , "integration_test"
# , "unicode_error"
# , "test_simple"
# , "test_wrapper"
# , "test_cohort"
]
suite
=
unittest
.
TestSuite
()
for
test
in
tests_to_run
:
suite
.
addTest
(
TestStuff
(
test
))
runner
=
unittest
.
TextTestRunner
()
runner
.
run
(
suite
)
# unittest.main()
# t = TestStuff()
# t.test_remove_file_not_there()
# t.test_integration_test()
# t.test_comment_with_semicolon()
# t.test_none_csv()
# t.test_unicode_error()
# t.test_simple()
# t.test_wrapper()
# t.test_cohort()
unittest
.
main
()
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment