commit 812877f45d3032b747ed7fa1c8a1ec10bd61fdf4 Author: 尹舟 <13007110208@163.com> Date: Wed Feb 5 14:18:02 2025 +0800 第一次提交 diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e833929 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +.venv/ +.idea/ +.deploy/ +logs/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..8b67213 --- /dev/null +++ b/demo.py @@ -0,0 +1,169 @@ +import sqlparse +import sqlglot +from sqlglot.expressions import ColumnDef + + +def extract_create_table(sql_script): + # 解析 SQL 脚本 + parsed = sqlparse.parse(sql_script) + + create_table_statements = [] + + for statement in parsed: + # 关闭格式化选项保持原样 + stripped = sqlparse.format( + statement.value, + strip_comments=True, + reindent=False, + keyword_case="lower" + ) + + # 跳过空语句 + if not stripped.strip(): + continue + + # 可修改条件来匹配其他语句类型 + if stripped.upper().strip().startswith(("CREATE TABLE")): + create_table_statements.append(stripped) + + return "\n".join(create_table_statements) + + +# 原始 SQL 脚本 +sql_script = """ +BEGIN; + +/* +DROP TABLE ods.track_log_002; +*/ + +-- Type: TABLE ; Name: track_log_002; Owner: sdk_statis_developer + +CREATE TABLE ods.track_log_002 ( + appid bigint NOT NULL, + app_ver text, + sdk_ver text, + channel text, + country text, + province text, + city text, + isp text, + ip text, + device_width integer, + device_height integer, + device_id text NOT NULL, + device_lang text, + device_model text, + device_brand text, + device_os text, + device_type text, + event_name text NOT NULL, + event_type text, + event_time bigint NOT NULL, + net_type text, + user_id text, + order_id text, + amount bigint, + platform text, + status integer, + servid text, + server_name text, + role_id text, + role_name text, + role_level text, + job_id text, + job_name text, + var1 text, + var2 text, + var3 text, + var4 text, + var5 text, + var6 text, + var7 text, + var8 text, + var9 text, + var10 text, + var11 text, + var12 text, + var13 text, + var14 text, + var15 text, + var16 text, + var17 text, + var18 text, + var19 text, + var20 text, + var21 text, + var22 text, + var23 text, + var24 text, + var25 text, + var26 text, + var27 text, + var28 text, + var29 text, + var30 text, + ds text NOT NULL, + prodid text, + prod_name text, + sub_servid text, + sub_server_name text +) + PARTITION BY LIST (ds)with ( +orientation = 'column', +storage_format = 'orc', +auto_partitioning_enable = 'true', +auto_partitioning_num_hot = '90', +auto_partitioning_num_precreate = '2', +auto_partitioning_num_retention = '191', +auto_partitioning_schd_start_time = '1970-01-01 00:00:00', +auto_partitioning_time_format = '', +auto_partitioning_time_unit = 'day', +auto_partitioning_time_zone = 'PRC', +bitmap_columns = 'appid,event_name,ds,role_id,device_id,servid,user_id,country,channel,province,status,city,device_width,var4,var3,var2,var1,amount,device_height,var12,var13,var14,var15,var10,var11,var9,var8,var7,var6,var5,event_time', +clustering_key = 'appid:asc', +dictionary_encoding_columns = '', +segment_key = 'event_time', +table_group = 'sdk_statis_tg_s80', +table_storage_mode = 'hot', +time_to_live_in_seconds = '16416000' +); + + + +COMMENT ON TABLE ods.track_log_002 IS NULL; +ALTER TABLE ods.track_log_002 OWNER TO sdk_statis_developer; + + +END; +""" + +# 执行解析 +result = extract_create_table(sql_script) + +re_create_table_sql = sqlglot.transpile(result, read="postgres", write="hive")[0] + +parsed = sqlglot.parse_one(re_create_table_sql, read='hive') + +# 获取表名 +table_name = parsed.this.this + +columns = [] +# 遍历所有可能包含列定义的子表达式 +for expression in parsed.walk(): + if isinstance(expression[0], ColumnDef): + # 获取列名 + column_name = expression[0].this.this + # 获取数据类型 + column_type = expression[0].args['kind'].this.name.upper() + # 如果是TEXT类型,则转换为STRING + if column_type == 'TEXT': + column_type = 'STRING' + columns.append({'name': column_name, 'type': column_type}) + +# 输出表名和字段信息 +print(f"表名称: {table_name}") + +# 输出结果 +for column in columns: + print(f"字段名称: {column['name']}, 字段类型: {column['type']}") diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..9ef109a --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,14 @@ +version: '3.4' +services: + sql-runner: + build: + context: . + dockerfile: Dockerfile + restart: always + container_name: sqllineage + image: sqllineage:latest + ports: + - "8778:8778" + + +# docker-compose up --build \ No newline at end of file diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..85e5c88 --- /dev/null +++ b/dockerfile @@ -0,0 +1,20 @@ +# 使用阿里云的 Python 3.11 镜像 +FROM registry.cn-hangzhou.aliyuncs.com/yinzhou_docker_hub/python:3.11-alpine + +# 设置工作目录 +WORKDIR /opt/sqllineage + +# 设置时区为 Asia/Shanghai +ENV TZ=Asia/Shanghai + +# 将 requirements.txt 文件复制到容器中 +COPY requirements.txt . + +# 安装依赖 +RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple + +# 将其他文件复制到容器中 +COPY . . + +# 运行应用程序 +ENTRYPOINT ["python3", "sqllineage.py"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2dbdf80 Binary files /dev/null and b/requirements.txt differ diff --git a/sqllineage.py b/sqllineage.py new file mode 100644 index 0000000..12dde21 --- /dev/null +++ b/sqllineage.py @@ -0,0 +1,34 @@ +from flask import Flask, render_template, request, jsonify +from utils.sql_parse import parse_create_table_sql +from utils.log import Log + + +app = Flask(__name__) + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/convert', methods=['POST']) +def convert_sql(): + # 创建一个新的Log实例,确保每天创建一个新的日志文件 + log = Log().getlog() + sql_input = request.form['sql'] + hologres_connection = request.form['hologresConnection'] + log.info("SQL Input: %s", sql_input) + log.info("SQL hologres_connection: %s", hologres_connection) + try: + parsed_result=parse_create_table_sql(sql_input,hologres_connection) + + result = { + 'target_tables': parsed_result, + 'message': 'SQL processed successfully.' + } + except Exception as e: + result = {'error': str(e)} + log.info("SQL result: %s", result) + return jsonify(result) + +if __name__ == '__main__': + # 指定host和port,这里使用0.0.0.0可以让服务器被外部访问 + app.run(host='0.0.0.0', port=8778, debug=True) \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..3985e3d --- /dev/null +++ b/templates/index.html @@ -0,0 +1,116 @@ + + +
+ +