Commit 384cd839 by Selah Clarity

modify sql in sqlalchemy! woo hoo

1 parent 643d13e5
...@@ -27,11 +27,13 @@ def get_mssql_engine( ...@@ -27,11 +27,13 @@ def get_mssql_engine(
#%% My functions #%% My functions
selahcredsfilename = 'C:\\Users\\LynchSe\\Documents\\selah_clarity_credentials.txt' selahcredsfilename = 'C:\\Users\\LynchSe\\Documents\\selah_clarity_credentials.txt'
def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='clarityprod.uphs.upenn.edu'): def get_clarity_engine(credsfilename = selahcredsfilename, **kwargs):
with open(credsfilename, 'r') as credsfile: with open(credsfilename, 'r') as credsfile:
nameline = credsfile.readline().strip() nameline = credsfile.readline().strip()
pwline = credsfile.readline().strip() pwline = credsfile.readline().strip()
clarity_engine = get_mssql_engine(username=nameline, password=pwline, timeout=timeout, host=host)
kwargs #this is a dict, how do I pass it to the following function
clarity_engine = get_mssql_engine(username=nameline, password=pwline, **kwargs)
return clarity_engine return clarity_engine
...@@ -54,9 +56,19 @@ def sqltext_to_dfs(sqltext, dbconn): ...@@ -54,9 +56,19 @@ def sqltext_to_dfs(sqltext, dbconn):
def sqlfile_to_dfs(sqlfilename, dbconn): def sqlfile_to_dfs(sqlfilename, dbconn):
print("Running SQL from {}".format(sqlfilename))
import time
start = time.time()
with open(sqlfilename, 'r') as sqlfile: with open(sqlfilename, 'r') as sqlfile:
sqltext = sqlfile.read() sqltext = sqlfile.read()
dfs = sqltext_to_dfs(sqltext, dbconn) dfs = sqltext_to_dfs(sqltext, dbconn)
end = time.time()
duration = end - start
dtstr = time.strftime('%a %I:%M%p %Y-%m-%d')
print("Query ran and exported {} in {:.1f} s".format(dtstr, duration))
return dfs return dfs
......
...@@ -33,11 +33,17 @@ def remove_files(filenamelist): ...@@ -33,11 +33,17 @@ def remove_files(filenamelist):
class TestStuff(unittest.TestCase): class TestStuff(unittest.TestCase):
def test_remove_file_not_there(self): def __init__(self, *args, **kwargs):
#make it not make that stupid shitty error super(TestStuff, self).__init__(*args, **kwargs)
remove_files(['poop.csv']) self.eng = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu', database='clarity')
self.conn = self.eng.connect()
def __del__(self):
self.conn.close()
#TODO - finish converting all connections to the class connection
# except for test_integration_test and test_both_connections
def test_integration_test(self): def test_integration_test(self):
sqlfilename1 = testquerydir + "testCohort.sql" sqlfilename1 = testquerydir + "testCohort.sql"
sqlfilename2 = testquerydir + "readTestCohort.sql" sqlfilename2 = testquerydir + "readTestCohort.sql"
...@@ -50,19 +56,28 @@ class TestStuff(unittest.TestCase): ...@@ -50,19 +56,28 @@ class TestStuff(unittest.TestCase):
ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn) ctc.clarity_to_csv(sqlfilename1, genericcsvs, dbconn=sqalconn)
ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn) ctc.clarity_to_csv(sqlfilename2, genericcsvs, dbconn=sqalconn)
def test_both_connections(self):
with ctc.get_clarity_engine(host='clarityprod.uphs.upenn.edu', database='clarity_snapshot_db').connect() as connprod:
connprod.execute("SELECT 1;")
with ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu', database='clarity').connect() as conndev:
conndev.execute("SELECT 1;")
def test_remove_file_not_there(self):
#make it not make that stupid shitty error
remove_files(['poop.csv'])
def test_save_to_dataframes(self): def test_save_to_dataframes(self):
sqlfilename = testquerydir + "testCohort.sql" sqlfilename = testquerydir + "testCohort.sql"
with ctc.get_clarity_engine().connect() as sqalconn: (df1, df2) = ctc.sqlfile_to_dfs(sqlfilename, self.conn)
(df1, df2) = ctc.sqlfile_to_dfs(sqlfilename, sqalconn) self.assertEqual(len(df1),3)
self.assertEqual(len(df1),3) self.assertEqual(len(df2),2)
self.assertEqual(len(df2),2)
def test_save_to_df_2col(self): def test_save_to_df_2col(self):
sql_2col = '''SELECT TOP 10 PAT_ID, PAT_ENC_CSN_ID FROM PAT_ENC; sql_2col = '''SELECT TOP 10 PAT_ID, PAT_ENC_CSN_ID FROM PAT_ENC;
SELECT TOP 5 CONTACT_DATE FROM PAT_ENC tablesample(0.01); SELECT TOP 5 CONTACT_DATE FROM PAT_ENC tablesample(0.01);
''' '''
with ctc.get_clarity_engine().connect() as sqalconn: (df1, df2) = ctc.sqltext_to_dfs(sql_2col, self.conn)
(df1, df2) = ctc.sqltext_to_dfs(sql_2col, sqalconn)
self.assertEqual(len(df1),10) self.assertEqual(len(df1),10)
self.assertEqual(len(df2),5) self.assertEqual(len(df2),5)
...@@ -78,9 +93,7 @@ class TestStuff(unittest.TestCase): ...@@ -78,9 +93,7 @@ class TestStuff(unittest.TestCase):
testdatadir + 'top2_pat_enc.csv' testdatadir + 'top2_pat_enc.csv'
] ]
remove_files(genericcsvs) remove_files(genericcsvs)
with ctc.get_clarity_engine().connect() as sqalconn: ctc.clarity_to_csv_inner(sqltext, genericcsvs, self.conn)
ctc.clarity_to_csv_inner(sqltext, genericcsvs, sqalconn)
def test_none_csv(self): def test_none_csv(self):
sqlfilename = testquerydir + "testCohort.sql" sqlfilename = testquerydir + "testCohort.sql"
...@@ -159,9 +172,10 @@ class TestStuff(unittest.TestCase): ...@@ -159,9 +172,10 @@ class TestStuff(unittest.TestCase):
#%% #%%
if __name__ == '__main__': if __name__ == '__main__':
t = TestStuff() # t = TestStuff()
t.test_save_to_dataframes() # t.test_connections()
t.test_save_to_df_2col() # t.test_save_to_dataframes()
# t.test_save_to_df_2col()
# t.test_remove_file_not_there() # t.test_remove_file_not_there()
# t.test_integration_test() # t.test_integration_test()
# t.test_comment_with_semicolon() # t.test_comment_with_semicolon()
...@@ -173,7 +187,7 @@ if __name__ == '__main__': ...@@ -173,7 +187,7 @@ if __name__ == '__main__':
# t.test_cohort() # t.test_cohort()
# unittest.main() unittest.main()
......
#kwargs play
def use_values(a=0, b=0):
print("first val {} second val {}".format(a, b))
#%%
#can I use this as input to above function?
values = {'a':3, 'b':7}
#%%
use_values(8,9)
use_values(values['a'], values['b'])
use_values(**values)
#modify sql using sqlalchemy
import pandas as pd
import sqlalchemy
sqlalchemy.__version__
#%%
import clarity_to_csv as ctc
eng_clarity = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu', database='clarity')
conn = eng_clarity.connect()
#%%
metadata_obj = sqlalchemy.MetaData()
#%% It works when we treat TextualSelect as a subquery!!!
sql_text = sqlalchemy.text("SELECT PAT_ID, COUNT(1) ENC_CNT FROM PAT_ENC GROUP BY PAT_ID")
txtsel = sql_text.columns(sqlalchemy.sql.column('PAT_ID'), sqlalchemy.sql.column('ENC_CNT'))
#txtsel = sql_text.columns(sqlalchemy.sql.column('PAT_ID'))
sq = txtsel.subquery()
selobj = sqlalchemy.select(sq).where(sq.c.PAT_ID == 'Z3153680')
res = conn.execute(selobj)
res.keys()
df = pd.read_sql(selobj, con=conn)
#%% What is the from clause method??
sqlalchemy.select(txtsel)
sqlalchemy.select(sqlalchemy.sql.column('PAT_ID')).select_from(txtsel)
sqlalchemy.select_from(txtsel)
#%% Posted on Stack overflow but I'm pretty sure its not the answer
sql_text = sqlalchemy.text("SELECT * FROM PAT_ENC ORDER BY PAT_ID")
mytab = sqlalchemy.Table(sql_text, metadata_obj, autoload_with=eng_clarity)
stmt = sqlalchemy.select(mytab).where(mytab.c.PAT_ID == 5)
conn.execute(stmt)
import pandas as pd
import sqlalchemy
import clarity_to_csv as ctc
sqlalchemy.__version__
#%%
sqlite_db_filepath = "C:\\Users\\LynchSe\\Documents\\Data\\database.db"
e = sqlalchemy.create_engine('sqlite:///{}'.format(sqlite_db_filepath))
conn = e.connect()
#%%
eng_clarity = ctc.get_clarity_engine(host='claritydev.uphs.upenn.edu', database='clarity')
conn = eng_clarity.connect()
#%% Try out bound parameters
#https://docs.sqlalchemy.org/en/14/tutorial/dbapi_transactions.html#tutorial-working-with-transactions
from sqlalchemy import text
sql = text('''SELECT * FROM PAT_ENC
WHERE :begin_date <= CONTACT_DATE AND CONTACT_DATE < :end_date
''')
params = {}
params['begin_date'] = pd.to_datetime('2021-10-06') #I think contact date does not have time precision
params['end_date'] = pd.to_datetime('2021-10-07')
df = pd.read_sql(sql, con=conn, params=params)
#%% Try out bound parameters with a list
sql = text('''SELECT * FROM PAT_ENC
WHERE PAT_ID IN :pat_ids
''')
params={}
params['pat_ids'] = ['Z3153680','042511758']
df2 = pd.read_sql(sql, con=conn, params=params)
#%%
sqlwp = sql.bindparams(**params)
df3 = pd.read_sql(sql, con=conn, params=params)
#%% Reflect tables from database into metadata object that
#apparently is necessary for SQLAlchemy expression language
#https://docs.sqlalchemy.org/en/14/tutorial/metadata.html#tutorial-working-with-metadata
from sqlalchemy import MetaData
metadata_obj = MetaData()
pat_enc = sqlalchemy.Table("pat_enc", metadata_obj, autoload_with=eng_clarity)
list(pat_enc.columns)
patient = sqlalchemy.Table("patient", metadata_obj, autoload_with=eng_clarity)
list(patient.columns)
###quote from docs
#To start using the SQLAlchemy Expression Language, we will want to have Table objects constructed that represent all of the database tables we are interested in working with.
#Each Table may be declared, meaning we explicitly spell out in source code what the table looks like,
#or may be reflected, which means we generate the object based on what’s already present in a particular database.
#%% Make some select statements using the metadata objects
sqlasql = sqlalchemy.select(patient.c.PAT_ID, patient.c.ZIP).where(patient.c.PAT_ID.in_(['Z3153680','042511758']))
df4 = pd.read_sql(sqlasql, con=conn)
#%% Can I made select or a table out of a text construct?
# Doesn't seem like it??
sql_text = sqlalchemy.text("SELECT * FROM PAT_ENC ORDER BY PAT_ID")
mytab = sqlalchemy.Table(sql_text, metadata_obj, autoload_with=eng_clarity)
stmt = sqlalchemy.select(mytab).where(mytab.c.PAT_ID == 5)
conn.execute(stmt)
mytab = sqlalchemy.Table(sql_text, metadata_obj, autoload_with=eng_clarity)
mytab2 = sqlalchemy.table(sql_text)
list(mytab2.columns)
#%% GAHHH I THINK IM CLOSE
sql_text = sqlalchemy.text("SELECT * FROM PAT_ENC ORDER BY PAT_ID")
stmt = sql_text.columns(sqlalchemy.sql.column('PAT_ID')).subquery()
sqlalchemy.select(stmt).where(stmt.PAT_ID == 'Z3153680')
#sqlalchemy.sql.expression.TextualSelect(sql_text)
#%%
stmt2 = stmt.where(pat_enc.c.PAT_ID == 'Z3153680')
#does this work?!?!?!
conn.execute(stmt2) #NO it doesn't work... booo
#%% #can I add a where clause that WOULD have required me inserting in middle of SQL?
# The dream is that have my labs, or medications query ready...
# then I add whatever additional filters I want
# but it acts as fast as it would have been if I had the filter in the first place
#%%
#Can I take a SQL statement
#add a where clause
#BEFORE executing?
#begin with this statment (that would be super fucking slow)
"SELECT * FROM PAT_ENC ORDER BY PAT_ID"
#insert this where filter without having to fuck around with text
"WHERE '2021-10-05'< CONTACT_DATE AND CONTACT_DATE < '2021-10-05'"
#eventually substitute date parameters for those dates "bound parameters"
#eventually eventually add a filter for a list of patients
#maybe I can do this in the ORM?? OR MAYBE I MUST STICK WITH CORE??
#%%
sqlite_db_filepath = "C:\\Users\\LynchSe\\Documents\\Data\\database.db"
e = sqlalchemy.create_engine('sqlite:///{}'.format(sqlite_db_filepath))
conn = e.connect()
#%%
eng_clarity = ctc.get_clarity_engine()
conn_clarity = eng_clarity.connect()
#%%
metadata_obj = sqlalchemy.MetaData()
#%%
mytable = sqlalchemy.schema.Table("mytable", metadata_obj,
sqlalchemy.Column('mytable_id', sqlalchemy.Integer, primary_key=True),
sqlalchemy.Column('value', sqlalchemy.String(50))
)
mytable.name
#%% HOLY SHIT, I CAN READ INFO ABOUT A TABLE FROM CLARITY!!!
pat_enc = sqlalchemy.Table('PAT_ENC', metadata_obj, autoload_with=eng_clarity)
list(pat_enc.columns)
#%% Can I easily import a cohort into clarity?
cohort = sqlalchemy.schema.Table("##cohort", metadata_obj,
sqlalchemy.Column('PAT_ID', sqlalchemy.String(18), primary_key=True))
#%%
cohort.create(eng_clarity)
#%%
stmt = sqlalchemy.insert(cohort).values(PAT_ID='5931')
eng_clarity.execute(stmt)
#%%
rows = [{'PAT_ID':'1132'}, {'PAT_ID':'1133'}]
stmt = sqlalchemy.insert(cohort).values(rows)
eng_clarity.execute(stmt)
#TODO - test if this can handle a gazillion rows without being super slow
#%%
stmt = sqlalchemy.select(cohort)
res = eng_clarity.execute(stmt)
#%%
import pandas as pd
df = pd.read_sql(stmt, eng_clarity)
df2 = pd.read_sql(stmt.where(cohort.c.PAT_ID < 2000), eng_clarity)
df3 = pd.read_sql(stmt.where(cohort.c.PAT_ID.in_([5931, 1132])), eng_clarity)
#%% now how do I select from PAT_ENC based on a list of ids
#TODO - import into temp table and use join
#TODO - compare with IN clause?
import sqlalchemy
import clarity_to_csv as ctc
#from sqlalchemy import create_engine
sqlalchemy.__version__
#%%
sqlite_db_filepath = "C:\\Users\\LynchSe\\Documents\\Data\\database.db"
e = sqlalchemy.create_engine('sqlite:///{}'.format(sqlite_db_filepath))
conn = e.connect()
#%%
eng_clarity = ctc.get_clarity_engine()
conn_clarity = eng_clarity.connect()
#%%
metadata_obj = sqlalchemy.MetaData()
#%%
mytable = sqlalchemy.schema.Table("mytable", metadata_obj,
sqlalchemy.Column('mytable_id', sqlalchemy.Integer, primary_key=True),
sqlalchemy.Column('value', sqlalchemy.String(50))
)
mytable.name
#%% HOLY SHIT, I CAN READ INFO ABOUT A TABLE FROM CLARITY!!!
pat_enc = sqlalchemy.Table('PAT_ENC', metadata_obj, autoload_with=eng_clarity)
list(pat_enc.columns)
#%% Can I easily import a cohort into clarity?
cohort = sqlalchemy.schema.Table("##cohort", metadata_obj,
sqlalchemy.Column('PAT_ID', sqlalchemy.String(18), primary_key=True))
#%%
cohort.create(eng_clarity)
#%%
stmt = sqlalchemy.insert(cohort).values(PAT_ID='5931')
eng_clarity.execute(stmt)
#%%
rows = [{'PAT_ID':'1132'}, {'PAT_ID':'1133'}]
stmt = sqlalchemy.insert(cohort).values(rows)
eng_clarity.execute(stmt)
#TODO - test if this can handle a gazillion rows without being super slow
#%%
stmt = sqlalchemy.select(cohort)
res = eng_clarity.execute(stmt)
#%%
import pandas as pd
df = pd.read_sql(stmt, eng_clarity)
df2 = pd.read_sql(stmt.where(cohort.c.PAT_ID < 2000), eng_clarity)
df3 = pd.read_sql(stmt.where(cohort.c.PAT_ID.in_([5931, 1132])), eng_clarity)
#%% now how do I select from PAT_ENC based on a list of ids
#TODO - import into temp table and use join
#TODO - compare with IN clause?
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!