sqlalchemy_play.py
1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sqlalchemy
import clarity_to_csv as ctc
#from sqlalchemy import create_engine
sqlalchemy.__version__
#%%
sqlite_db_filepath = "C:\\Users\\LynchSe\\Documents\\Data\\database.db"
e = sqlalchemy.create_engine('sqlite:///{}'.format(sqlite_db_filepath))
conn = e.connect()
#%%
eng_clarity = ctc.get_clarity_engine()
conn_clarity = eng_clarity.connect()
#%%
metadata_obj = sqlalchemy.MetaData()
#%%
mytable = sqlalchemy.schema.Table("mytable", metadata_obj,
sqlalchemy.Column('mytable_id', sqlalchemy.Integer, primary_key=True),
sqlalchemy.Column('value', sqlalchemy.String(50))
)
mytable.name
#%% HOLY SHIT, I CAN READ INFO ABOUT A TABLE FROM CLARITY!!!
pat_enc = sqlalchemy.Table('PAT_ENC', metadata_obj, autoload_with=eng_clarity)
list(pat_enc.columns)
#%% Can I easily import a cohort into clarity?
cohort = sqlalchemy.schema.Table("##cohort", metadata_obj,
sqlalchemy.Column('PAT_ID', sqlalchemy.String(18), primary_key=True))
#%%
cohort.create(eng_clarity)
#%%
stmt = sqlalchemy.insert(cohort).values(PAT_ID='5931')
eng_clarity.execute(stmt)
#%%
rows = [{'PAT_ID':'1132'}, {'PAT_ID':'1133'}]
stmt = sqlalchemy.insert(cohort).values(rows)
eng_clarity.execute(stmt)
#TODO - test if this can handle a gazillion rows without being super slow
#%%
stmt = sqlalchemy.select(cohort)
res = eng_clarity.execute(stmt)
#%%
import pandas as pd
df = pd.read_sql(stmt, eng_clarity)
df2 = pd.read_sql(stmt.where(cohort.c.PAT_ID < 2000), eng_clarity)
df3 = pd.read_sql(stmt.where(cohort.c.PAT_ID.in_([5931, 1132])), eng_clarity)
#%% now how do I select from PAT_ENC based on a list of ids
#TODO - import into temp table and use join
#TODO - compare with IN clause?