clarity_to_csv.py 3.13 KB
from sqlalchemy import create_engine
from getpass import getpass
import pymssql
import csv
import pandas as pd

#%% Andrew functions

def get_mssql_engine(
    username="lynchse",
    host="clarityprod.uphs.upenn.edu",
    database="clarity_snapshot_db",
    domain="UPHS",
    port="1433",
    timeout=7200,
    password=None,
):
    from sqlalchemy import create_engine

    if password is None:
        password = getpass("PW: ")
    user = domain + "\\" + username
    return create_engine(
        f"mssql+pymssql://{user}:{password}@{host}:{port}/{database}?timeout={timeout}"
    )

#%% My functions

selahcredsfilename = 'C:\\Users\\LynchSe\\Documents\\selah_clarity_credentials.txt'
def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600):
        with open(credsfilename, 'r') as credsfile:
            nameline = credsfile.readline().strip()
            pwline = credsfile.readline().strip()
            clarity_engine = get_mssql_engine(username=nameline, password=pwline, timeout=timeout)
            return clarity_engine

def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
    print("Running SQL from {}".format(sqlfilename))
    with open(sqlfilename, 'r') as sqlfile:
        sqltext = sqlfile.read()
        eng = get_clarity_engine()
        if dbconn:
            clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
        else:
            with eng.connect() as sqalconn:
                clarity_to_csv_inner(sqltext, csvfilenames, dbconn)


def clarity_to_csv_inner(sqltext, csvfilenames, sqalconn, verbose=False):
    import sqlparse
    sqltext_cleaned = sqlparse.format(sqltext, strip_comments=True).strip()
    sqlstatements = sqltext_cleaned.split(';')
        
    if sqlstatements[-1].strip() == '':
        sqlstatements.pop() # often there is a final semicolon leading to a empty last statement
    which_statement = 0
    which_csvfile = 0
    for sqlstatement in sqlstatements:
        if verbose == True:
            print("\n\nProcessing statement: '{}'\n".format(sqlstatement.strip()))
        #TODO - time the query execution time
        results = sqalconn.execute(sqlstatement) #this is hanging?          
        if results.returns_rows == True: #ONLY DO THIS IFFFF there are >0 results rows
            csvname = csvfilenames[which_csvfile]
            which_csvfile += 1
            if csvname:
                print("Writing CSV file {}".format(csvname))
                #TODO - time the file writing time
                with open(csvname, 'w', newline='\n', encoding='utf-8') as csvfile:
                    line_count = 0
                    mycsvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                    #TODO - write column names
                    mycsvwriter.writerow(results.keys())
                    line_count += 1
                    for row in results:
                       # print("Line {}".format(line_count))
                        mycsvwriter.writerow(row)
                        line_count += 1
                print("Done writing CSV file!")
        which_statement += 1