bulk_insert_test.py

import pandas as pd
import unittest
import sqlalchemy

import bulk_insert


import clarity_to_csv as ctc
conn = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu').connect()
#conn = ctc.get_clarity_engine(host='clarityprod.uphs.upenn.edu').connect()


#TODO - scramble this damn data so it isn't PHI

class TestStuff(unittest.TestCase):

    def test_something(self):
        self.assertEqual(2+1, 3)

    def test_integration_1(self):
        #  Integration test, desired workflow as of July 2021

        datadir = 'C:/Users/LynchSe/Documents/Data/Clarity_Tools_Selah/'
        dfp = pd.read_csv(datadir + "test_cohort_36559.csv")
        dfc = dfp[['PAT_ID', 'BIRTH_DATE', 'ENC_CNT_2017']]
        tabledef = '''PAT_ID VARCHAR(18) NOT NULL, BIRTH_DATE DATETIME, ENC_CNT_2017 INTEGER
        '''

        import clarity_to_csv as ctc
        conn = ctc.get_clarity_engine().connect()

        bulk_insert.create_and_import(dfc, '##cohort_sample2', tabledef, conn)


    def test_integration_2(self):
        datadir = 'C:/Users/LynchSe/Documents/Data/Clarity_Tools_Selah/'
        dfcohort_inp1 = pd.read_excel(datadir + 'geobirth_patient_list_2018_2021.xls')
        #TODO - scramble this data so it isn't PHI

        import bulk_insert
        table_def = 'MRN VARCHAR(100)'
        bulk_insert.create_and_import(dfcohort_inp1[['mrn']], '##cohort_inp', table_def, conn, max_insert=1000)


if __name__ == '__main__':

    unittest.main()
#    t = TestStuff()
#    t.test_something()