clarity_to_csv.py
3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from sqlalchemy import create_engine
from getpass import getpass
import pymssql
import csv
import pandas as pd
#%% Andrew functions
def get_mssql_engine(
username="lynchse",
host="clarityprod.uphs.upenn.edu",
database="clarity_snapshot_db",
domain="UPHS",
port="1433",
timeout=7200,
password=None,
):
from sqlalchemy import create_engine
if password is None:
password = getpass("PW: ")
user = domain + "\\" + username
return create_engine(
f"mssql+pymssql://{user}:{password}@{host}:{port}/{database}?timeout={timeout}"
)
#%% My functions
selahcredsfilename = 'C:\\Users\\LynchSe\\Documents\\selah_clarity_credentials.txt'
def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600):
with open(credsfilename, 'r') as credsfile:
nameline = credsfile.readline().strip()
pwline = credsfile.readline().strip()
clarity_engine = get_mssql_engine(username=nameline, password=pwline, timeout=timeout)
return clarity_engine
def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
print("Running SQL from {}".format(sqlfilename))
with open(sqlfilename, 'r') as sqlfile:
sqltext = sqlfile.read()
eng = get_clarity_engine()
if dbconn:
clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
else:
with eng.connect() as sqalconn:
clarity_to_csv_inner(sqltext, csvfilenames, dbconn)
def clarity_to_csv_inner(sqltext, csvfilenames, sqalconn, verbose=False):
import sqlparse
sqltext_cleaned = sqlparse.format(sqltext, strip_comments=True).strip()
sqlstatements = sqltext_cleaned.split(';')
if sqlstatements[-1].strip() == '':
sqlstatements.pop() # often there is a final semicolon leading to a empty last statement
which_statement = 0
which_csvfile = 0
for sqlstatement in sqlstatements:
if verbose == True:
print("\n\nProcessing statement: '{}'\n".format(sqlstatement.strip()))
#TODO - time the query execution time
results = sqalconn.execute(sqlstatement) #this is hanging?
if results.returns_rows == True: #ONLY DO THIS IFFFF there are >0 results rows
csvname = csvfilenames[which_csvfile]
which_csvfile += 1
if csvname:
print("Writing CSV file {}".format(csvname))
#TODO - time the file writing time
with open(csvname, 'w', newline='\n', encoding='utf-8') as csvfile:
line_count = 0
mycsvwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#TODO - write column names
mycsvwriter.writerow(results.keys())
line_count += 1
for row in results:
# print("Line {}".format(line_count))
mycsvwriter.writerow(row)
line_count += 1
print("Done writing CSV file!")
which_statement += 1