170 lines
4.0 KiB
Python
170 lines
4.0 KiB
Python
|
import sqlparse
|
|||
|
import sqlglot
|
|||
|
from sqlglot.expressions import ColumnDef
|
|||
|
|
|||
|
|
|||
|
def extract_create_table(sql_script):
|
|||
|
# 解析 SQL 脚本
|
|||
|
parsed = sqlparse.parse(sql_script)
|
|||
|
|
|||
|
create_table_statements = []
|
|||
|
|
|||
|
for statement in parsed:
|
|||
|
# 关闭格式化选项保持原样
|
|||
|
stripped = sqlparse.format(
|
|||
|
statement.value,
|
|||
|
strip_comments=True,
|
|||
|
reindent=False,
|
|||
|
keyword_case="lower"
|
|||
|
)
|
|||
|
|
|||
|
# 跳过空语句
|
|||
|
if not stripped.strip():
|
|||
|
continue
|
|||
|
|
|||
|
# 可修改条件来匹配其他语句类型
|
|||
|
if stripped.upper().strip().startswith(("CREATE TABLE")):
|
|||
|
create_table_statements.append(stripped)
|
|||
|
|
|||
|
return "\n".join(create_table_statements)
|
|||
|
|
|||
|
|
|||
|
# 原始 SQL 脚本
|
|||
|
sql_script = """
|
|||
|
BEGIN;
|
|||
|
|
|||
|
/*
|
|||
|
DROP TABLE ods.track_log_002;
|
|||
|
*/
|
|||
|
|
|||
|
-- Type: TABLE ; Name: track_log_002; Owner: sdk_statis_developer
|
|||
|
|
|||
|
CREATE TABLE ods.track_log_002 (
|
|||
|
appid bigint NOT NULL,
|
|||
|
app_ver text,
|
|||
|
sdk_ver text,
|
|||
|
channel text,
|
|||
|
country text,
|
|||
|
province text,
|
|||
|
city text,
|
|||
|
isp text,
|
|||
|
ip text,
|
|||
|
device_width integer,
|
|||
|
device_height integer,
|
|||
|
device_id text NOT NULL,
|
|||
|
device_lang text,
|
|||
|
device_model text,
|
|||
|
device_brand text,
|
|||
|
device_os text,
|
|||
|
device_type text,
|
|||
|
event_name text NOT NULL,
|
|||
|
event_type text,
|
|||
|
event_time bigint NOT NULL,
|
|||
|
net_type text,
|
|||
|
user_id text,
|
|||
|
order_id text,
|
|||
|
amount bigint,
|
|||
|
platform text,
|
|||
|
status integer,
|
|||
|
servid text,
|
|||
|
server_name text,
|
|||
|
role_id text,
|
|||
|
role_name text,
|
|||
|
role_level text,
|
|||
|
job_id text,
|
|||
|
job_name text,
|
|||
|
var1 text,
|
|||
|
var2 text,
|
|||
|
var3 text,
|
|||
|
var4 text,
|
|||
|
var5 text,
|
|||
|
var6 text,
|
|||
|
var7 text,
|
|||
|
var8 text,
|
|||
|
var9 text,
|
|||
|
var10 text,
|
|||
|
var11 text,
|
|||
|
var12 text,
|
|||
|
var13 text,
|
|||
|
var14 text,
|
|||
|
var15 text,
|
|||
|
var16 text,
|
|||
|
var17 text,
|
|||
|
var18 text,
|
|||
|
var19 text,
|
|||
|
var20 text,
|
|||
|
var21 text,
|
|||
|
var22 text,
|
|||
|
var23 text,
|
|||
|
var24 text,
|
|||
|
var25 text,
|
|||
|
var26 text,
|
|||
|
var27 text,
|
|||
|
var28 text,
|
|||
|
var29 text,
|
|||
|
var30 text,
|
|||
|
ds text NOT NULL,
|
|||
|
prodid text,
|
|||
|
prod_name text,
|
|||
|
sub_servid text,
|
|||
|
sub_server_name text
|
|||
|
)
|
|||
|
PARTITION BY LIST (ds)with (
|
|||
|
orientation = 'column',
|
|||
|
storage_format = 'orc',
|
|||
|
auto_partitioning_enable = 'true',
|
|||
|
auto_partitioning_num_hot = '90',
|
|||
|
auto_partitioning_num_precreate = '2',
|
|||
|
auto_partitioning_num_retention = '191',
|
|||
|
auto_partitioning_schd_start_time = '1970-01-01 00:00:00',
|
|||
|
auto_partitioning_time_format = '',
|
|||
|
auto_partitioning_time_unit = 'day',
|
|||
|
auto_partitioning_time_zone = 'PRC',
|
|||
|
bitmap_columns = 'appid,event_name,ds,role_id,device_id,servid,user_id,country,channel,province,status,city,device_width,var4,var3,var2,var1,amount,device_height,var12,var13,var14,var15,var10,var11,var9,var8,var7,var6,var5,event_time',
|
|||
|
clustering_key = 'appid:asc',
|
|||
|
dictionary_encoding_columns = '',
|
|||
|
segment_key = 'event_time',
|
|||
|
table_group = 'sdk_statis_tg_s80',
|
|||
|
table_storage_mode = 'hot',
|
|||
|
time_to_live_in_seconds = '16416000'
|
|||
|
);
|
|||
|
|
|||
|
|
|||
|
|
|||
|
COMMENT ON TABLE ods.track_log_002 IS NULL;
|
|||
|
ALTER TABLE ods.track_log_002 OWNER TO sdk_statis_developer;
|
|||
|
|
|||
|
|
|||
|
END;
|
|||
|
"""
|
|||
|
|
|||
|
# 执行解析
|
|||
|
result = extract_create_table(sql_script)
|
|||
|
|
|||
|
re_create_table_sql = sqlglot.transpile(result, read="postgres", write="hive")[0]
|
|||
|
|
|||
|
parsed = sqlglot.parse_one(re_create_table_sql, read='hive')
|
|||
|
|
|||
|
# 获取表名
|
|||
|
table_name = parsed.this.this
|
|||
|
|
|||
|
columns = []
|
|||
|
# 遍历所有可能包含列定义的子表达式
|
|||
|
for expression in parsed.walk():
|
|||
|
if isinstance(expression[0], ColumnDef):
|
|||
|
# 获取列名
|
|||
|
column_name = expression[0].this.this
|
|||
|
# 获取数据类型
|
|||
|
column_type = expression[0].args['kind'].this.name.upper()
|
|||
|
# 如果是TEXT类型,则转换为STRING
|
|||
|
if column_type == 'TEXT':
|
|||
|
column_type = 'STRING'
|
|||
|
columns.append({'name': column_name, 'type': column_type})
|
|||
|
|
|||
|
# 输出表名和字段信息
|
|||
|
print(f"表名称: {table_name}")
|
|||
|
|
|||
|
# 输出结果
|
|||
|
for column in columns:
|
|||
|
print(f"字段名称: {column['name']}, 字段类型: {column['type']}")
|