clarity_to_csv_test.py 5.89 KB
import unittest
import clarity_to_csv as ctc
import pandas as pd
from unittest.mock import MagicMock
import sqlparse


testquerydir = 'C:\\Users\\LynchSe\\Documents\\Repos\\rClarity_Tools_Selah\\clarity_to_csv_tests\\'
testdatadir = 'C:\\Users\\LynchSe\\Documents\\Data\\Clarity_Tools_Selah\\'


#TODO spin up a sqlite database here

def line_count(filename):
    with open(filename) as myfile:
        return len(myfile.readlines())

def remove_files(filenamelist):
    import os
    import subprocess
    for filename in filenamelist:
        if filename is None:
            next
        elif os.path.isfile(filename):
            try:
                subprocess.check_output('rm {}'.format(filename))  
            except Exception as e:
                print(e)  
                pass
        else :
            print("Skipping removal because not recognized as file - {}".format(filename))
            

class TestStuff(unittest.TestCase):
    
    def test_remove_file_not_there(self):
        #make it not make that stupid shitty error
        remove_files(['poop.csv'])
        
    
    def test_integration_test(self):
        sqlfilename1 = testquerydir + "testCohort.sql"
        sqlfilename2 = testquerydir + "readTestCohort.sql"
        genericcsvs = [
            testdatadir + 'test1.csv', 
            testdatadir + 'test2.csv',
        ]
        remove_files(genericcsvs)
        with ctc.get_clarity_engine().connect() as sqalconn:
            ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn)      
            ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn)      

    def test_save_to_dataframes(self):
        sqlfilename = testquerydir + "testCohort.sql"
        with ctc.get_clarity_engine().connect() as sqalconn:
            (df1, df2) = ctc.sqlfile_to_dfs(sqlfilename, sqalconn)
            self.assertEqual(len(df1),3)
            self.assertEqual(len(df2),2)
    
    def test_save_to_df_2col(self):
        sql_2col = '''SELECT TOP 10 PAT_ID, PAT_ENC_CSN_ID FROM PAT_ENC; 
                            SELECT TOP 5 CONTACT_DATE FROM PAT_ENC tablesample(0.01);
                            '''
        with ctc.get_clarity_engine().connect() as sqalconn:
            (df1, df2) = ctc.sqltext_to_dfs(sql_2col, sqalconn)
        self.assertEqual(len(df1),10)
        self.assertEqual(len(df2),5)

    
    def test_comment_with_semicolon(self):
        sqltext = '''
            SELECT TOP 2 PAT_ID FROM PAT_ENC;
            /*
            SELECT 1; SELECT 2;
            */
        '''
        genericcsvs = [
            testdatadir + 'top2_pat_enc.csv'
        ]
        remove_files(genericcsvs)
        with ctc.get_clarity_engine().connect() as sqalconn:
            ctc.clarity_to_csv_inner(sqltext, genericcsvs, sqalconn)      

        
    def test_none_csv(self):
        sqlfilename = testquerydir + "testCohort.sql"
        genericcsvs = [
            None, #it should just skip this one
            testdatadir + 'two_empis.csv',
        ]
        remove_files(genericcsvs)
        with ctc.get_clarity_engine().connect() as sqalconn:
            ctc.clarity_to_csv(sqlfilename, genericcsvs, dbconn=sqalconn)      


    def test_unicode_error(self):
        genericcsvs = [
            testdatadir + 'test_cohort.csv'
        ]  
        remove_files(genericcsvs)
        #I think it hangs if I don't include 'IF EXISTS'. GRRR... why??
        sql_text = r'''
            SELECT * FROM 
            X_COVID19_LAB_ORDERS l
            WHERE PAT_ID IN ('055948350', '041356734', '057338121');
            '''
        with ctc.get_clarity_engine().connect() as conn:
            ctc.clarity_to_csv_inner(sql_text, genericcsvs, conn)


    def test_simple(self):
        genericcsvs = [
            testdatadir + 'test1.csv', 
            testdatadir + 'test2.csv'
        ]
        remove_files(genericcsvs)
        simple_sql_text = '''SELECT TOP 10 PAT_ID, PAT_ENC_CSN_ID FROM PAT_ENC; 
                            SELECT TOP 5 CONTACT_DATE FROM PAT_ENC tablesample(0.01);
                            '''
        with ctc.get_clarity_engine().connect() as conn:
            ctc.clarity_to_csv_inner(simple_sql_text, genericcsvs, conn)
        self.assertEqual(line_count( testdatadir + 'test1.csv'), 11)
        self.assertEqual(line_count( testdatadir + 'test2.csv'), 6)


    def test_wrapper(self):
        sqlfilename = testquerydir + "tinyTestQuery.sql"
        genericcsvs = [
            testdatadir + 'test1.csv', 
            testdatadir + 'test2.csv',
            testdatadir + 'test3.csv'
        ]
        remove_files(genericcsvs)
        ctc.clarity_to_csv(sqlfilename, genericcsvs)      


    #I believe this is hanging
    def test_cohort(self):
        genericcsvs = [
            testdatadir + 'test_cohort.csv'
        ]  
        remove_files(genericcsvs)
        #I think it hangs if I don't include 'IF EXISTS'. GRRR... why??
        sql_text = '''
            DROP TABLE IF EXISTS ##cohort;  
            CREATE TABLE ##cohort (EMPI VARCHAR(90) NOT NULL);
            INSERT INTO ##cohort (EMPI) values ('8001111117'),('1000000000');
            SELECT * FROM ##cohort;
            '''
        with ctc.get_clarity_engine().connect() as conn:
            ctc.clarity_to_csv_inner(sql_text, genericcsvs, conn)

        self.assertEqual(line_count(testdatadir + 'test_cohort.csv'), 3) #a header and two values



#TODO - deal with wrong number of csv's supplied

#%%
if __name__ == '__main__':

     t = TestStuff()
     t.test_save_to_dataframes()
     t.test_save_to_df_2col()
    # t.test_remove_file_not_there()
    # t.test_integration_test() 
    # t.test_comment_with_semicolon()    
    # t.test_none_csv()   
    # t.test_unicode_error()

    # t.test_simple()
    # t.test_wrapper()
    # t.test_cohort()


    # unittest.main()