第一次提交

This commit is contained in:
尹舟 2025-02-05 14:18:02 +08:00
commit 812877f45d
11 changed files with 616 additions and 0 deletions

4
.dockerignore Normal file
View File

@ -0,0 +1,4 @@
.venv/
.idea/
.deploy/
logs/

8
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

169
demo.py Normal file
View File

@ -0,0 +1,169 @@
import sqlparse
import sqlglot
from sqlglot.expressions import ColumnDef
def extract_create_table(sql_script):
# 解析 SQL 脚本
parsed = sqlparse.parse(sql_script)
create_table_statements = []
for statement in parsed:
# 关闭格式化选项保持原样
stripped = sqlparse.format(
statement.value,
strip_comments=True,
reindent=False,
keyword_case="lower"
)
# 跳过空语句
if not stripped.strip():
continue
# 可修改条件来匹配其他语句类型
if stripped.upper().strip().startswith(("CREATE TABLE")):
create_table_statements.append(stripped)
return "\n".join(create_table_statements)
# 原始 SQL 脚本
sql_script = """
BEGIN;
/*
DROP TABLE ods.track_log_002;
*/
-- Type: TABLE ; Name: track_log_002; Owner: sdk_statis_developer
CREATE TABLE ods.track_log_002 (
appid bigint NOT NULL,
app_ver text,
sdk_ver text,
channel text,
country text,
province text,
city text,
isp text,
ip text,
device_width integer,
device_height integer,
device_id text NOT NULL,
device_lang text,
device_model text,
device_brand text,
device_os text,
device_type text,
event_name text NOT NULL,
event_type text,
event_time bigint NOT NULL,
net_type text,
user_id text,
order_id text,
amount bigint,
platform text,
status integer,
servid text,
server_name text,
role_id text,
role_name text,
role_level text,
job_id text,
job_name text,
var1 text,
var2 text,
var3 text,
var4 text,
var5 text,
var6 text,
var7 text,
var8 text,
var9 text,
var10 text,
var11 text,
var12 text,
var13 text,
var14 text,
var15 text,
var16 text,
var17 text,
var18 text,
var19 text,
var20 text,
var21 text,
var22 text,
var23 text,
var24 text,
var25 text,
var26 text,
var27 text,
var28 text,
var29 text,
var30 text,
ds text NOT NULL,
prodid text,
prod_name text,
sub_servid text,
sub_server_name text
)
PARTITION BY LIST (ds)with (
orientation = 'column',
storage_format = 'orc',
auto_partitioning_enable = 'true',
auto_partitioning_num_hot = '90',
auto_partitioning_num_precreate = '2',
auto_partitioning_num_retention = '191',
auto_partitioning_schd_start_time = '1970-01-01 00:00:00',
auto_partitioning_time_format = '',
auto_partitioning_time_unit = 'day',
auto_partitioning_time_zone = 'PRC',
bitmap_columns = 'appid,event_name,ds,role_id,device_id,servid,user_id,country,channel,province,status,city,device_width,var4,var3,var2,var1,amount,device_height,var12,var13,var14,var15,var10,var11,var9,var8,var7,var6,var5,event_time',
clustering_key = 'appid:asc',
dictionary_encoding_columns = '',
segment_key = 'event_time',
table_group = 'sdk_statis_tg_s80',
table_storage_mode = 'hot',
time_to_live_in_seconds = '16416000'
);
COMMENT ON TABLE ods.track_log_002 IS NULL;
ALTER TABLE ods.track_log_002 OWNER TO sdk_statis_developer;
END;
"""
# 执行解析
result = extract_create_table(sql_script)
re_create_table_sql = sqlglot.transpile(result, read="postgres", write="hive")[0]
parsed = sqlglot.parse_one(re_create_table_sql, read='hive')
# 获取表名
table_name = parsed.this.this
columns = []
# 遍历所有可能包含列定义的子表达式
for expression in parsed.walk():
if isinstance(expression[0], ColumnDef):
# 获取列名
column_name = expression[0].this.this
# 获取数据类型
column_type = expression[0].args['kind'].this.name.upper()
# 如果是TEXT类型则转换为STRING
if column_type == 'TEXT':
column_type = 'STRING'
columns.append({'name': column_name, 'type': column_type})
# 输出表名和字段信息
print(f"表名称: {table_name}")
# 输出结果
for column in columns:
print(f"字段名称: {column['name']}, 字段类型: {column['type']}")

14
docker-compose.yaml Normal file
View File

@ -0,0 +1,14 @@
version: '3.4'
services:
sql-runner:
build:
context: .
dockerfile: Dockerfile
restart: always
container_name: sqllineage
image: sqllineage:latest
ports:
- "8778:8778"
# docker-compose up --build

20
dockerfile Normal file
View File

@ -0,0 +1,20 @@
# 使用阿里云的 Python 3.11 镜像
FROM registry.cn-hangzhou.aliyuncs.com/yinzhou_docker_hub/python:3.11-alpine
# 设置工作目录
WORKDIR /opt/sqllineage
# 设置时区为 Asia/Shanghai
ENV TZ=Asia/Shanghai
# 将 requirements.txt 文件复制到容器中
COPY requirements.txt .
# 安装依赖
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
# 将其他文件复制到容器中
COPY . .
# 运行应用程序
ENTRYPOINT ["python3", "sqllineage.py"]

BIN
requirements.txt Normal file

Binary file not shown.

34
sqllineage.py Normal file
View File

@ -0,0 +1,34 @@
from flask import Flask, render_template, request, jsonify
from utils.sql_parse import parse_create_table_sql
from utils.log import Log
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/convert', methods=['POST'])
def convert_sql():
# 创建一个新的Log实例确保每天创建一个新的日志文件
log = Log().getlog()
sql_input = request.form['sql']
hologres_connection = request.form['hologresConnection']
log.info("SQL Input: %s", sql_input)
log.info("SQL hologres_connection: %s", hologres_connection)
try:
parsed_result=parse_create_table_sql(sql_input,hologres_connection)
result = {
'target_tables': parsed_result,
'message': 'SQL processed successfully.'
}
except Exception as e:
result = {'error': str(e)}
log.info("SQL result: %s", result)
return jsonify(result)
if __name__ == '__main__':
# 指定host和port这里使用0.0.0.0可以让服务器被外部访问
app.run(host='0.0.0.0', port=8778, debug=True)

116
templates/index.html Normal file
View File

@ -0,0 +1,116 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>SQL Processor</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
textarea, input[type="text"] { /* 统一文本区域和输入框样式 */
width: 100%;
height: 40px; /* 设置输入框高度 */
margin-bottom: 10px;
font-family: Arial, sans-serif; /* 确保字体一致 */
color: #333; /* 设置文本颜色 */
border: 1px solid #ccc; /* 边框样式 */
padding: 10px; /* 内边距 */
background-color: #f9f9f9; /* 背景颜色 */
}
textarea {
height: 200px; /* 设置文本区域高度 */
overflow-y: auto; /* 当内容超出时显示垂直滚动条 */
resize: none; /* 禁止用户调整大小 */
}
button {
display: block;
margin-top: 10px;
padding: 10px;
}
#resultArea {
margin-top: 20px;
display: none;
}
.output-box {
border: 1px solid #ccc;
padding: 10px;
background-color: #f9f9f9;
margin-bottom: 10px;
}
h1, h2 { /* 统一处理所有标题样式 */
font-family: Arial, sans-serif;
color: #333; /* 设置文本颜色 */
margin-top: 0; /* 移除顶部间距 */
}
</style>
</head>
<body>
<h1>SQL Processor</h1>
<form id="sqlForm">
<textarea id="sqlInput" placeholder="Enter your SQL here..."></textarea>
<label for="hologresConnection">Hologres Connection Info:</label>
<input type="text" id="hologresConnection" name="hologresConnection" value="hgprecn-cn-i7m2ssubq004-cn-hangzhou-internal.hologres.aliyuncs.com:80" placeholder="Enter Hologres connection info...">
<button type="submit">Convert</button>
</form>
<div id="resultArea" class="output-box">
<!-- Processed SQL will be inserted here -->
</div>
<script>
document.getElementById('sqlForm').addEventListener('submit', function(event) {
event.preventDefault();
const sql = document.getElementById('sqlInput').value;
const hologresConnection = document.getElementById('hologresConnection').value;
fetch('/convert', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: new URLSearchParams({
'sql': sql,
'hologresConnection': hologresConnection // 添加到请求体中
})
})
.then(response => response.json())
.then(data => {
if (data.error) {
// 显示错误信息
document.getElementById('resultArea').innerHTML = `<textarea readonly>${replaceNewline(escapeHtml(data.error))}</textarea>`;
} else {
// 构建输出内容,并确保所有字符都被正确转义
let targetTablesString = JSON.stringify(data.target_tables, null, 2);
// 去掉最外层的双引号
targetTablesString = targetTablesString.replace(/^"|"$/g, '');
let outputHTML = `
<h2>Processed SQL:</h2>
<textarea readonly>${replaceNewline(escapeHtml(targetTablesString))}</textarea>
<p>${replaceNewline(escapeHtml(data.message))}</p>
`;
document.getElementById('resultArea').innerHTML = outputHTML;
}
// 显示结果区域
document.getElementById('resultArea').style.display = 'block';
})
.catch(error => console.error('Error:', error));
});
// 转义HTML特殊字符
function escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
// 替换 \n 为实际的换行符
function replaceNewline(str) {
return str.split('\\n').join('\n');
}
</script>
</body>
</html>

108
templates/index1.html Normal file
View File

@ -0,0 +1,108 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>SQL Processor</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
textarea {
width: 100%;
height: 200px; /* 设置默认高度 */
margin-bottom: 10px;
font-family: Arial, sans-serif; /* 确保字体一致 */
color: #333; /* 设置文本颜色 */
resize: none; /* 禁止用户调整大小 */
overflow-y: auto; /* 当内容超出时显示垂直滚动条 */
border: 1px solid #ccc; /* 边框样式 */
padding: 10px; /* 内边距 */
background-color: #f9f9f9; /* 背景颜色 */
}
button {
display: block;
margin-top: 10px;
padding: 10px;
}
#resultArea {
margin-top: 20px;
display: none;
}
.output-box {
border: 1px solid #ccc;
padding: 10px;
background-color: #f9f9f9;
margin-bottom: 10px;
}
h1, h2 { /* 统一处理所有标题样式 */
font-family: Arial, sans-serif;
color: #333; /* 设置文本颜色 */
margin-top: 0; /* 移除顶部间距 */
}
</style>
</head>
<body>
<h1>SQL Processor</h1>
<form id="sqlForm">
<textarea id="sqlInput" placeholder="Enter your SQL here..."></textarea>
<button type="submit">Convert</button>
</form>
<div id="resultArea" class="output-box">
<!-- Processed SQL will be inserted here -->
</div>
<script>
document.getElementById('sqlForm').addEventListener('submit', function(event) {
event.preventDefault();
const sql = document.getElementById('sqlInput').value;
fetch('/convert', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: new URLSearchParams({
'sql': sql
})
})
.then(response => response.json())
.then(data => {
if (data.error) {
// 显示错误信息
document.getElementById('resultArea').innerHTML = `<textarea readonly>${replaceNewline(escapeHtml(data.error))}</textarea>`;
} else {
// 构建输出内容,并确保所有字符都被正确转义
let targetTablesString = JSON.stringify(data.target_tables, null, 2);
// 去掉最外层的双引号
targetTablesString = targetTablesString.replace(/^"|"$/g, '');
let outputHTML = `
<h2>Processed SQL:</h2>
<textarea readonly>${replaceNewline(escapeHtml(targetTablesString))}</textarea>
<p>${replaceNewline(escapeHtml(data.message))}</p>
`;
document.getElementById('resultArea').innerHTML = outputHTML;
}
// 显示结果区域
document.getElementById('resultArea').style.display = 'block';
})
.catch(error => console.error('Error:', error));
});
// 转义HTML特殊字符
function escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
// 替换 \n 为实际的换行符
function replaceNewline(str) {
return str.split('\\n').join('\n');
}
</script>
</body>
</html>

58
utils/log.py Normal file
View File

@ -0,0 +1,58 @@
import logging
import os
from datetime import datetime
# 定义全局变量 log_path
cur_path = os.path.dirname(os.path.realpath(__file__))
log_path = os.path.join(os.path.dirname(cur_path), 'logs')
class Log():
def __init__(self, logger_name='my_logger'):
self.logger = logging.getLogger(logger_name)
if self.logger.hasHandlers():
self.logger.handlers.clear()
self.logger.setLevel(logging.INFO)
if not os.path.exists(log_path):
os.makedirs(log_path)
self.update_log_file()
def update_log_file(self):
current_date = datetime.now().strftime("%Y_%m_%d")
self.log_name = os.path.join(log_path, f'{current_date}.log')
for handler in self.logger.handlers[:]:
self.logger.removeHandler(handler)
fh = logging.FileHandler(self.log_name, 'a', encoding='utf-8')
fh.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('[%(asctime)s] %(filename)s line:%(lineno)d [%(levelname)s]%(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
self.logger.addHandler(fh)
self.logger.addHandler(ch)
def getlog(self):
today = datetime.now().strftime("%Y_%m_%d")
log_date = os.path.basename(self.log_name).split('.')[0]
if today != log_date:
self.update_log_file()
return self.logger
def info(self, msg, *args, **kwargs):
logger = self.getlog()
logger.info(msg, *args, **kwargs)
if __name__ == "__main__":
log = Log().getlog()
log.info("---测试开始----")
log.error("操作步骤1,2,3")
log.warning("----测试结束----")

85
utils/sql_parse.py Normal file
View File

@ -0,0 +1,85 @@
import sqlparse
import sqlglot
from sqlglot.expressions import ColumnDef
from utils.log import Log
def odps(schema,table_name,columns,colmapping,hologres_connection):
odps_sql=f'''
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name}
(
{columns}
)
STORED BY 'com.aliyun.odps.jdbc.JdbcStorageHandler'
-- ip设置成经典网络ip 加Schema 加表名
location 'jdbc:postgresql://{hologres_connection}/{schema}?ApplicationName=MaxCompute&currentSchema={schema}&preferQueryMode=simple&useSSL=false&table={table_name}/'
TBLPROPERTIES (
'mcfed.mapreduce.jdbc.driver.class'='org.postgresql.Driver',
'odps.federation.jdbc.target.db.type'='holo',
-- 格式为MaxCompute字段1 : "Hologres字段1",MaxCompute字段2 : "Hologres字段2"
'odps.federation.jdbc.colmapping'='{colmapping}'
);
'''
return odps_sql
def extract_create_table(sql_script):
# 创建一个新的Log实例确保每天创建一个新的日志文件
log = Log().getlog()
# 解析 SQL 脚本
parsed = sqlparse.parse(sql_script)
create_table_statements = []
for statement in parsed:
# 关闭格式化选项保持原样
stripped = sqlparse.format(
statement.value,
strip_comments=True,
reindent=False,
keyword_case="lower"
)
# 跳过空语句
if not stripped.strip():
continue
# 可修改条件来匹配其他语句类型
if stripped.upper().strip().startswith(("CREATE TABLE")):
create_table_statements.append(stripped)
return "\n".join(create_table_statements)
def parse_create_table_sql(create_table_sql,hologres_connection):
# 创建一个新的Log实例确保每天创建一个新的日志文件
log = Log().getlog()
result = extract_create_table(create_table_sql)
re_create_table_sql = sqlglot.transpile(result, read="postgres", write="hive")[0]
parsed = sqlglot.parse_one(re_create_table_sql, read='hive')
# 获取表名
table_name = parsed.this.this
columns = []
colmapping = []
# 遍历所有可能包含列定义的子表达式
for expression in parsed.walk():
if isinstance(expression[0], ColumnDef):
# 获取列名
column_name = expression[0].this.this
# 获取数据类型
column_type = expression[0].args['kind'].this.name.upper()
# 如果是TEXT类型则转换为STRING
if column_type == 'TEXT':
column_type = 'STRING'
columns.append(column_name+" "+column_type)
colmapping.append(column_name+":"+column_name)
# 将columns,colmapping转换成字符串用,分割
columns_str = ",\n".join(columns)
colmapping_str = ",".join(colmapping)
table_name_str=str(table_name).split('.')[-1]
schema=str(table_name).split('.')[0]
return odps(schema,table_name_str,columns_str,colmapping_str,hologres_connection)