Skip to content
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation
This project
Loading...
Sign in
cicTeam
/
Clarity_Tools_Selah
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit 643d13e5
authored
Sep 24, 2021
by
Selah Clarity
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
sql to df function, sqlalchemy play
1 parent
73c201b8
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
120 additions
and
8 deletions
clarity_tests.py
clarity_to_csv.py
clarity_to_csv_test.py
column_type.sql
sqlalchemy_play.py
clarity_tests.py
View file @
643d13e
...
@@ -32,6 +32,9 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl
...
@@ -32,6 +32,9 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl
class
TestStuff
(
unittest
.
TestCase
):
class
TestStuff
(
unittest
.
TestCase
):
def
test_clarity_dev_connection
(
self
):
eng
=
get_clarity_engine
(
host
=
'claritydev.uphs.upenn.edu'
)
#Test a basic connect and execute
#Test a basic connect and execute
def
test_basic_conn_execute
(
self
):
def
test_basic_conn_execute
(
self
):
eng
=
get_clarity_engine
()
eng
=
get_clarity_engine
()
...
@@ -116,6 +119,7 @@ if __name__ == '__main__':
...
@@ -116,6 +119,7 @@ if __name__ == '__main__':
# unittest.main()
# unittest.main()
t
=
TestStuff
()
t
=
TestStuff
()
t
.
test_clarity_dev_connection
()
t
.
test_basic_conn_execute
()
t
.
test_basic_conn_execute
()
t
.
test_dev_conn_execute
()
t
.
test_dev_conn_execute
()
t
.
test_temp_table_persistence
()
t
.
test_temp_table_persistence
()
...
...
clarity_to_csv.py
View file @
643d13e
...
@@ -35,6 +35,31 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl
...
@@ -35,6 +35,31 @@ def get_clarity_engine(credsfilename = selahcredsfilename, timeout=600, host='cl
return
clarity_engine
return
clarity_engine
def
sqltext_to_dfs
(
sqltext
,
dbconn
):
sql_stmts
=
extract_sql_statements
(
sqltext
)
dfs
=
[]
for
stmt
in
sql_stmts
:
res
=
dbconn
.
execute
(
stmt
)
if
res
.
returns_rows
==
True
:
columns
=
list
(
res
.
keys
())
values
=
[]
for
row
in
res
:
#note - there is some repeated logic here
values
.
append
(
list
(
row
))
df
=
pd
.
DataFrame
(
data
=
values
,
columns
=
columns
)
dfs
.
append
(
df
)
return
tuple
(
dfs
)
def
sqlfile_to_dfs
(
sqlfilename
,
dbconn
):
with
open
(
sqlfilename
,
'r'
)
as
sqlfile
:
sqltext
=
sqlfile
.
read
()
dfs
=
sqltext_to_dfs
(
sqltext
,
dbconn
)
return
dfs
def
clarity_to_csv
(
sqlfilename
,
csvfilenames
,
dbconn
=
None
):
def
clarity_to_csv
(
sqlfilename
,
csvfilenames
,
dbconn
=
None
):
print
(
"Running SQL from {}"
.
format
(
sqlfilename
))
print
(
"Running SQL from {}"
.
format
(
sqlfilename
))
import
time
import
time
...
@@ -53,14 +78,19 @@ def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
...
@@ -53,14 +78,19 @@ def clarity_to_csv(sqlfilename, csvfilenames, dbconn=None):
print
(
"Query ran and exported {} in {:.1f} s"
.
format
(
dtstr
,
duration
))
print
(
"Query ran and exported {} in {:.1f} s"
.
format
(
dtstr
,
duration
))
def
extract_sql_statements
(
sqltext
):
def
clarity_to_csv_inner
(
sqltext
,
csvfilenames
,
sqalconn
,
verbose
=
False
):
import
sqlparse
import
sqlparse
sqltext_cleaned
=
sqlparse
.
format
(
sqltext
,
strip_comments
=
True
)
.
strip
()
sqltext_cleaned
=
sqlparse
.
format
(
sqltext
,
strip_comments
=
True
)
.
strip
()
sqlstatements
=
sqltext_cleaned
.
split
(
';'
)
sqlstatements
=
sqltext_cleaned
.
split
(
';'
)
sqlstatements
=
[
stmt
.
strip
()
for
stmt
in
sqlstatements
]
if
sqlstatements
[
-
1
]
.
strip
()
==
''
:
if
sqlstatements
[
-
1
]
.
strip
()
==
''
:
sqlstatements
.
pop
()
# often there is a final semicolon leading to a empty last statement
sqlstatements
.
pop
()
# often there is a final semicolon leading to a empty last statement
return
sqlstatements
def
clarity_to_csv_inner
(
sqltext
,
csvfilenames
,
sqalconn
,
verbose
=
False
):
sqlstatements
=
extract_sql_statements
(
sqltext
)
which_statement
=
0
which_statement
=
0
which_csvfile
=
0
which_csvfile
=
0
for
sqlstatement
in
sqlstatements
:
for
sqlstatement
in
sqlstatements
:
...
@@ -77,7 +107,7 @@ def clarity_to_csv_inner(sqltext, csvfilenames, sqalconn, verbose=False):
...
@@ -77,7 +107,7 @@ def clarity_to_csv_inner(sqltext, csvfilenames, sqalconn, verbose=False):
with
open
(
csvname
,
'w'
,
newline
=
'
\n
'
,
encoding
=
'utf-8'
)
as
csvfile
:
with
open
(
csvname
,
'w'
,
newline
=
'
\n
'
,
encoding
=
'utf-8'
)
as
csvfile
:
line_count
=
0
line_count
=
0
mycsvwriter
=
csv
.
writer
(
csvfile
,
delimiter
=
','
,
quotechar
=
'"'
,
quoting
=
csv
.
QUOTE_MINIMAL
)
mycsvwriter
=
csv
.
writer
(
csvfile
,
delimiter
=
','
,
quotechar
=
'"'
,
quoting
=
csv
.
QUOTE_MINIMAL
)
#
TODO -
write column names
#write column names
mycsvwriter
.
writerow
(
results
.
keys
())
mycsvwriter
.
writerow
(
results
.
keys
())
line_count
+=
1
line_count
+=
1
for
row
in
results
:
for
row
in
results
:
...
...
clarity_to_csv_test.py
View file @
643d13e
...
@@ -50,6 +50,21 @@ class TestStuff(unittest.TestCase):
...
@@ -50,6 +50,21 @@ class TestStuff(unittest.TestCase):
ctc
.
clarity_to_csv
(
sqlfilename1
,
genericcsvs
,
dbconn
=
sqalconn
)
ctc
.
clarity_to_csv
(
sqlfilename1
,
genericcsvs
,
dbconn
=
sqalconn
)
ctc
.
clarity_to_csv
(
sqlfilename2
,
genericcsvs
,
dbconn
=
sqalconn
)
ctc
.
clarity_to_csv
(
sqlfilename2
,
genericcsvs
,
dbconn
=
sqalconn
)
def
test_save_to_dataframes
(
self
):
sqlfilename
=
testquerydir
+
"testCohort.sql"
with
ctc
.
get_clarity_engine
()
.
connect
()
as
sqalconn
:
(
df1
,
df2
)
=
ctc
.
sqlfile_to_dfs
(
sqlfilename
,
sqalconn
)
self
.
assertEqual
(
len
(
df1
),
3
)
self
.
assertEqual
(
len
(
df2
),
2
)
def
test_save_to_df_2col
(
self
):
sql_2col
=
'''SELECT TOP 10 PAT_ID, PAT_ENC_CSN_ID FROM PAT_ENC;
SELECT TOP 5 CONTACT_DATE FROM PAT_ENC tablesample(0.01);
'''
with
ctc
.
get_clarity_engine
()
.
connect
()
as
sqalconn
:
(
df1
,
df2
)
=
ctc
.
sqltext_to_dfs
(
sql_2col
,
sqalconn
)
self
.
assertEqual
(
len
(
df1
),
10
)
self
.
assertEqual
(
len
(
df2
),
5
)
def
test_comment_with_semicolon
(
self
):
def
test_comment_with_semicolon
(
self
):
...
@@ -78,8 +93,6 @@ class TestStuff(unittest.TestCase):
...
@@ -78,8 +93,6 @@ class TestStuff(unittest.TestCase):
ctc
.
clarity_to_csv
(
sqlfilename
,
genericcsvs
,
dbconn
=
sqalconn
)
ctc
.
clarity_to_csv
(
sqlfilename
,
genericcsvs
,
dbconn
=
sqalconn
)
def
test_unicode_error
(
self
):
def
test_unicode_error
(
self
):
genericcsvs
=
[
genericcsvs
=
[
testdatadir
+
'test_cohort.csv'
testdatadir
+
'test_cohort.csv'
...
@@ -147,8 +160,10 @@ class TestStuff(unittest.TestCase):
...
@@ -147,8 +160,10 @@ class TestStuff(unittest.TestCase):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestStuff
()
t
=
TestStuff
()
t
.
test_save_to_dataframes
()
t
.
test_save_to_df_2col
()
# t.test_remove_file_not_there()
# t.test_remove_file_not_there()
t
.
test_integration_test
()
#
t.test_integration_test()
# t.test_comment_with_semicolon()
# t.test_comment_with_semicolon()
# t.test_none_csv()
# t.test_none_csv()
# t.test_unicode_error()
# t.test_unicode_error()
...
@@ -158,7 +173,7 @@ if __name__ == '__main__':
...
@@ -158,7 +173,7 @@ if __name__ == '__main__':
# t.test_cohort()
# t.test_cohort()
#unittest.main()
#
unittest.main()
...
...
column_type.sql
0 → 100644
View file @
643d13e
select
*
from
INFORMATION_SCHEMA
.
COLUMNS
where
table_name
=
'CLARITY_MEDICATION'
\ No newline at end of file
\ No newline at end of file
sqlalchemy_play.py
0 → 100644
View file @
643d13e
import
sqlalchemy
import
clarity_to_csv
as
ctc
#from sqlalchemy import create_engine
sqlalchemy
.
__version__
#%%
sqlite_db_filepath
=
"C:
\\
Users
\\
LynchSe
\\
Documents
\\
Data
\\
database.db"
e
=
sqlalchemy
.
create_engine
(
'sqlite:///{}'
.
format
(
sqlite_db_filepath
))
conn
=
e
.
connect
()
#%%
eng_clarity
=
ctc
.
get_clarity_engine
()
conn_clarity
=
eng_clarity
.
connect
()
#%%
metadata_obj
=
sqlalchemy
.
MetaData
()
#%%
mytable
=
sqlalchemy
.
schema
.
Table
(
"mytable"
,
metadata_obj
,
sqlalchemy
.
Column
(
'mytable_id'
,
sqlalchemy
.
Integer
,
primary_key
=
True
),
sqlalchemy
.
Column
(
'value'
,
sqlalchemy
.
String
(
50
))
)
mytable
.
name
#%% HOLY SHIT, I CAN READ INFO ABOUT A TABLE FROM CLARITY!!!
pat_enc
=
sqlalchemy
.
Table
(
'PAT_ENC'
,
metadata_obj
,
autoload_with
=
eng_clarity
)
list
(
pat_enc
.
columns
)
#%% Can I easily import a cohort into clarity?
cohort
=
sqlalchemy
.
schema
.
Table
(
"##cohort"
,
metadata_obj
,
sqlalchemy
.
Column
(
'PAT_ID'
,
sqlalchemy
.
String
(
18
),
primary_key
=
True
))
#%%
cohort
.
create
(
eng_clarity
)
#%%
stmt
=
sqlalchemy
.
insert
(
cohort
)
.
values
(
PAT_ID
=
'5931'
)
eng_clarity
.
execute
(
stmt
)
#%%
rows
=
[{
'PAT_ID'
:
'1132'
},
{
'PAT_ID'
:
'1133'
}]
stmt
=
sqlalchemy
.
insert
(
cohort
)
.
values
(
rows
)
eng_clarity
.
execute
(
stmt
)
#TODO - test if this can handle a gazillion rows without being super slow
#%%
stmt
=
sqlalchemy
.
select
(
cohort
)
res
=
eng_clarity
.
execute
(
stmt
)
#%%
import
pandas
as
pd
df
=
pd
.
read_sql
(
stmt
,
eng_clarity
)
df2
=
pd
.
read_sql
(
stmt
.
where
(
cohort
.
c
.
PAT_ID
<
2000
),
eng_clarity
)
df3
=
pd
.
read_sql
(
stmt
.
where
(
cohort
.
c
.
PAT_ID
.
in_
([
5931
,
1132
])),
eng_clarity
)
#%% now how do I select from PAT_ENC based on a list of ids
#TODO - import into temp table and use join
#TODO - compare with IN clause?
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment