Commit 188330a5 by Selah Clarity

various edits to data pulling tools

1 parent 9cfd5a48
......@@ -15,6 +15,23 @@ conn = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu').connect()
class TestStuff(unittest.TestCase):
#How to deal with, I don't know!!
def test_error_in_burris_meds_insert(self):
datadir = "C:/Users/LynchSe/Documents/Data/Burris_Geobirth/"
dfmed = pd.read_csv(datadir + "from_burris_lab/Copy of GeoBirth_med_counts_20210713.csv")
sgids = dfmed.SIMPLE_GENERIC_C.unique()
dfsgids_raw = pd.DataFrame({'SIMPLE_GENERIC_C':sgids})
dfsgids = dfsgids_raw.loc[18:23]
import bulk_insert
table_def = '''
SIMPLE_GENERIC_C INT
'''
#there were overflow issues
bulk_insert.create_and_import(dfsgids, '##sgmedids', table_def, conn)
def test_something(self):
self.assertEqual(2+1, 3)
......@@ -38,6 +55,8 @@ class TestStuff(unittest.TestCase):
HOSPITAL VARCHAR(20)
'''
bulk_insert.create_and_import(df, '##test_burris_pull_enc_short', table_def, conn, max_insert=200)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
def test_burris_pull_enc(self):
projdatadir = 'C:\\Users\\LynchSe\\Documents\\Data\\Burris_Geobirth\\'
......@@ -52,6 +71,7 @@ class TestStuff(unittest.TestCase):
HOSPITAL VARCHAR(20)
'''
bulk_insert.create_and_import(df, '##test_burris_pull_enc', table_def, conn, max_insert=1000)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
def test_integration_1(self):
......@@ -67,6 +87,7 @@ class TestStuff(unittest.TestCase):
conn = ctc.get_clarity_engine().connect()
bulk_insert.create_and_import(dfc, '##cohort_sample2', tabledef, conn)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
def test_integration_2(self):
......@@ -77,15 +98,17 @@ class TestStuff(unittest.TestCase):
import bulk_insert
table_def = 'MRN VARCHAR(100)'
bulk_insert.create_and_import(dfcohort_inp1[['mrn']], '##cohort_inp', table_def, conn, max_insert=1000)
## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
if __name__ == '__main__':
# unittest.main()
t = TestStuff()
t.test_burris_pull_enc()
t.test_burris_pull_enc_short()
t.test_error_in_burris_meds_insert()
# t.test_burris_pull_enc()
# t.test_burris_pull_enc_short()
# t.test_format_row_for_insert_nans()
# t.test_something()
''''''
......@@ -37,6 +37,8 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl
def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
print("Running SQL from {}".format(sqlfilename))
import time
start = time.time()
with open(sqlfilename, 'r') as sqlfile:
sqltext = sqlfile.read()
eng = get_clarity_engine()
......@@ -44,7 +46,11 @@ def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
else:
with eng.connect() as sqalconn:
clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
clarity_to_csv_inner(sqltext, csvfilenames, sqalconn)
end = time.time()
duration = end - start
print("Query ran and exported in {:.1f} s".format(duration))
def clarity_to_csv_inner(sqltext, csvfilenames, sqalconn, verbose=False):
......
......@@ -4,9 +4,8 @@ import pandas as pd
from unittest.mock import MagicMock
import sqlparse
#%%
testquerydir = 'C:\\Users\\LynchSe\\Documents\\Repos\\Covid19Related\\selah\\Clarity_Tools_Selah\\'
testquerydir = 'C:\\Users\\LynchSe\\Documents\\Repos\\rClarity_Tools_Selah\\clarity_to_csv_tests\\'
testdatadir = 'C:\\Users\\LynchSe\\Documents\\Data\\Clarity_Tools_Selah\\'
......@@ -17,17 +16,42 @@ def line_count(filename):
return len(myfile.readlines())
def remove_files(filenamelist):
import os
import subprocess
for filename in filenamelist:
if filename is None:
next
elif os.path.isfile(filename):
try:
subprocess.check_output('rm {}'.format(filename))
except Exception as e:
print(e)
pass
else :
print("Skipping removal because not recognized as file - {}".format(filename))
class TestStuff(unittest.TestCase):
def test_remove_file_not_there(self):
#make it not make that stupid shitty error
remove_files(['poop.csv'])
def test_integration_test(self):
sqlfilename1 = testquerydir + "testCohort.sql"
sqlfilename2 = testquerydir + "readTestCohort.sql"
genericcsvs = [
testdatadir + 'test1.csv',
testdatadir + 'test2.csv',
]
remove_files(genericcsvs)
with ctc.get_clarity_engine().connect() as sqalconn:
ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn)
ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn)
def test_comment_with_semicolon(self):
sqltext = '''
SELECT TOP 2 PAT_ID FROM PAT_ENC;
......@@ -55,20 +79,8 @@ class TestStuff(unittest.TestCase):
def integration_test(self):
sqlfilename1 = testquerydir + "testCohort.sql"
sqlfilename2 = testquerydir + "readTestCohort.sql"
genericcsvs = [
testdatadir + 'test1.csv',
testdatadir + 'test2.csv',
]
remove_files(genericcsvs)
with ctc.get_clarity_engine().connect() as sqalconn:
ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn)
ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn)
def unicode_error(self):
def test_unicode_error(self):
genericcsvs = [
testdatadir + 'test_cohort.csv'
]
......@@ -131,25 +143,22 @@ class TestStuff(unittest.TestCase):
#TODO - deal with wrong number of csv's supplied
#%%
if __name__ == '__main__':
tests_to_run = [
"test_comment_with_semicolon"
# , "test_none_csv"
# , "integration_test"
# , "unicode_error"
# , "test_simple"
# , "test_wrapper"
# , "test_cohort"
]
suite = unittest.TestSuite()
for test in tests_to_run:
suite.addTest(TestStuff(test))
runner = unittest.TextTestRunner()
runner.run(suite)
# t = TestStuff()
# t.test_remove_file_not_there()
# t.test_integration_test()
# t.test_comment_with_semicolon()
# t.test_none_csv()
# t.test_unicode_error()
# t.test_simple()
# t.test_wrapper()
# t.test_cohort()
# unittest.main()
unittest.main()
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!