sql_to_schema.py
2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import numpy as np
import sqlparse
def filter_whitespace(tokenlist):
tokenlistfilt = []
for token in tokenlist:
if (not(token.ttype == sqlparse.tokens.Whitespace) or (token.ttype == sqlparse.tokens.Whitespace.Newline)):
tokenlistfilt.append(token)
return tokenlistfilt
def extract_tables(sql):
tables = []
sqlp = sqlparse.parse(sql)
tks = sqlp[0].tokens
tksf = filter_whitespace(tks)
cursor = 0; chunk = tksf[cursor]
state = "start"
while(state != 'done'):
if state == "start":
assert chunk.value == 'SELECT'
cursor += 1; chunk = tksf[cursor]
assert type(chunk) in [sqlparse.sql.IdentifierList, sqlparse.sql.Identifier]
cursor += 1; chunk = tksf[cursor]
assert chunk.value == 'FROM'
cursor += 1; chunk = tksf[cursor]
state = "afterFROM"
if state == "afterFROM":
assert type(chunk) == sqlparse.sql.Identifier
tables.append(chunk.value)
cursor += 1;
if cursor >= len(tksf):
state = "done"
else:
state = "afterIdentifier"
chunk = tksf[cursor]
if state == "afterIdentifier":
chunk = tksf[cursor]
assert chunk.value in ["INNER JOIN", "LEFT OUTER JOIN"]
cursor += 1; chunk = tksf[cursor]
assert type(chunk) == sqlparse.sql.Identifier
tables.append(chunk.value)
cursor += 1; chunk = tksf[cursor]
assert chunk.value == 'ON'
cursor += 1; chunk = tksf[cursor]
assert type(chunk) == sqlparse.sql.Comparison
cursor += 1;
if cursor >= len(tksf):
state = "done"
else:
state = "afterFROM"
tables2 = []
for table in tables:
(tname, talias) = table.split(' ')
tables2.append({'name':tname,'alias':talias})
return tables2
#%% for experimenting
if __name__ == '__main__':
pass