various edits to data pulling tools

Selah Clarity
Commit 188330a5 authored Jul 30, 2021 by Selah Clarity
Showing 3 changed files with 74 additions and 36 deletions
bulk_insert_test.py
clarity_to_csv.py
clarity_to_csv_test.py
--- a/bulk_insert_test.py
+++ b/bulk_insert_test.py
@@ -15,6 +15,23 @@ conn = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu').connect()

 class TestStuff(unittest.TestCase):
    
+    
+    #How to deal with, I don't know!!
+    def test_error_in_burris_meds_insert(self):
+        datadir = "C:/Users/LynchSe/Documents/Data/Burris_Geobirth/"
+        dfmed = pd.read_csv(datadir + "from_burris_lab/Copy of GeoBirth_med_counts_20210713.csv")
+
+        sgids = dfmed.SIMPLE_GENERIC_C.unique()
+        dfsgids_raw = pd.DataFrame({'SIMPLE_GENERIC_C':sgids})
+        dfsgids = dfsgids_raw.loc[18:23]
+        import bulk_insert
+        table_def = ''' 
+            SIMPLE_GENERIC_C INT
+        '''
+        #there were overflow issues
+        bulk_insert.create_and_import(dfsgids, '##sgmedids', table_def, conn)
+        
+    
    def test_something(self):
        self.assertEqual(2+1, 3) 
        
@@ -38,6 +55,8 @@ class TestStuff(unittest.TestCase):
            HOSPITAL VARCHAR(20)
        '''
        bulk_insert.create_and_import(df, '##test_burris_pull_enc_short', table_def, conn, max_insert=200)
+        ## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
+

    def test_burris_pull_enc(self):
        projdatadir = 'C:\\Users\\LynchSe\\Documents\\Data\\Burris_Geobirth\\'
@@ -52,6 +71,7 @@ class TestStuff(unittest.TestCase):
            HOSPITAL VARCHAR(20)
        '''
        bulk_insert.create_and_import(df, '##test_burris_pull_enc', table_def, conn, max_insert=1000)
+        ## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD

        
    def test_integration_1(self):
@@ -67,6 +87,7 @@ class TestStuff(unittest.TestCase):
        conn = ctc.get_clarity_engine().connect()
        
        bulk_insert.create_and_import(dfc, '##cohort_sample2', tabledef, conn)
+        ## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD
        
        
    def test_integration_2(self):
@@ -77,15 +98,17 @@ class TestStuff(unittest.TestCase):
        import bulk_insert
        table_def = 'MRN VARCHAR(100)'
        bulk_insert.create_and_import(dfcohort_inp1[['mrn']], '##cohort_inp', table_def, conn, max_insert=1000)
+        ## REMEMBER TO LOOK IN CLARITYDEV, NOT PROD


 if __name__ == '__main__':

 #    unittest.main()
+
    t = TestStuff()
-    t.test_burris_pull_enc()
-    t.test_burris_pull_enc_short()
+    t.test_error_in_burris_meds_insert()
+#    t.test_burris_pull_enc()
+#    t.test_burris_pull_enc_short()
 #    t.test_format_row_for_insert_nans()
 #    t.test_something()
-
-
+''''''
--- a/clarity_to_csv.py
+++ b/clarity_to_csv.py
@@ -37,6 +37,8 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl

 def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
    print("Running SQL from {}".format(sqlfilename))
+    import time
+    start = time.time()
    with open(sqlfilename, 'r') as sqlfile:
        sqltext = sqlfile.read()
        eng = get_clarity_engine()
@@ -44,7 +46,11 @@ def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
            clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
        else:
            with eng.connect() as sqalconn:
-                clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
+                clarity_to_csv_inner(sqltext, csvfilenames, sqalconn)
+    end = time.time()
+    duration = end - start
+    print("Query ran and exported in {:.1f} s".format(duration))
+


 def clarity_to_csv_inner(sqltext, csvfilenames, sqalconn, verbose=False):

--- a/clarity_to_csv_test.py
+++ b/clarity_to_csv_test.py
@@ -4,9 +4,8 @@ import pandas as pd
 from unittest.mock import MagicMock
 import sqlparse

-#%%

-testquerydir = 'C:\\Users\\LynchSe\\Documents\\Repos\\Covid19Related\\selah\\Clarity_Tools_Selah\\'
+testquerydir = 'C:\\Users\\LynchSe\\Documents\\Repos\\rClarity_Tools_Selah\\clarity_to_csv_tests\\'
 testdatadir = 'C:\\Users\\LynchSe\\Documents\\Data\\Clarity_Tools_Selah\\'


@@ -17,17 +16,42 @@ def line_count(filename):
        return len(myfile.readlines())

 def remove_files(filenamelist):
+    import os
    import subprocess
    for filename in filenamelist:
+        if filename is None:
+            next
+        elif os.path.isfile(filename):
            try:
                subprocess.check_output('rm {}'.format(filename))  
            except Exception as e:
                print(e)  
                pass
+        else :
+            print("Skipping removal because not recognized as file - {}".format(filename))
            

 class TestStuff(unittest.TestCase):
    
+    def test_remove_file_not_there(self):
+        #make it not make that stupid shitty error
+        remove_files(['poop.csv'])
+        
+    
+    def test_integration_test(self):
+        sqlfilename1 = testquerydir + "testCohort.sql"
+        sqlfilename2 = testquerydir + "readTestCohort.sql"
+        genericcsvs = [
+            testdatadir + 'test1.csv', 
+            testdatadir + 'test2.csv',
+        ]
+        remove_files(genericcsvs)
+        with ctc.get_clarity_engine().connect() as sqalconn:
+            ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn)      
+            ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn)      
+
+
+    
    def test_comment_with_semicolon(self):
        sqltext = '''
            SELECT TOP 2 PAT_ID FROM PAT_ENC;
@@ -55,20 +79,8 @@ class TestStuff(unittest.TestCase):

        
    
-    def integration_test(self):
-        sqlfilename1 = testquerydir + "testCohort.sql"
-        sqlfilename2 = testquerydir + "readTestCohort.sql"
-        genericcsvs = [
-            testdatadir + 'test1.csv', 
-            testdatadir + 'test2.csv',
-        ]
-        remove_files(genericcsvs)
-        with ctc.get_clarity_engine().connect() as sqalconn:
-            ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn)      
-            ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn)      

-
-    def unicode_error(self):
+    def test_unicode_error(self):
        genericcsvs = [
            testdatadir + 'test_cohort.csv'
        ]  
@@ -131,25 +143,22 @@ class TestStuff(unittest.TestCase):

 #TODO - deal with wrong number of csv's supplied

-
+#%%
 if __name__ == '__main__':

-    tests_to_run = [
-        "test_comment_with_semicolon"
-        # , "test_none_csv"
-        # , "integration_test"
-        # , "unicode_error"
-        # , "test_simple"
-        # , "test_wrapper"
-        # , "test_cohort"
-    ]
-    suite = unittest.TestSuite()
-    for test in tests_to_run:
-        suite.addTest(TestStuff(test))
-    runner = unittest.TextTestRunner()
-    runner.run(suite)
+    # t = TestStuff()
+    # t.test_remove_file_not_there()
+    # t.test_integration_test() 
+    # t.test_comment_with_semicolon()    
+    # t.test_none_csv()   
+    # t.test_unicode_error()
+
+    # t.test_simple()
+    # t.test_wrapper()
+    # t.test_cohort()
+

-#    unittest.main()
+    unittest.main()