2025-07-25 16:59:34 +08:00
|
|
|
|
import requests
|
|
|
|
|
import html2text
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_entries(entries, result_list):
|
|
|
|
|
"""
|
|
|
|
|
递归提取嵌套结构中无子节点的条目信息
|
|
|
|
|
:param entries: 当前层级的条目列表
|
|
|
|
|
:param result_list: 存储结果的列表
|
|
|
|
|
"""
|
|
|
|
|
for entry in entries:
|
|
|
|
|
# 检查是否存在子节点
|
|
|
|
|
if "children" not in entry or not entry["children"]:
|
|
|
|
|
# 提取目标字段并拼接URL
|
|
|
|
|
result_list.append({
|
|
|
|
|
"title": entry.get("title", ""),
|
|
|
|
|
"scm": entry.get("scm", ""),
|
|
|
|
|
"url": "https://help.aliyun.com" + entry.get("url", "")
|
|
|
|
|
})
|
|
|
|
|
else:
|
|
|
|
|
# 递归处理子节点
|
|
|
|
|
extract_entries(entry["children"], result_list)
|
|
|
|
|
|
|
|
|
|
|
2025-07-18 13:52:29 +08:00
|
|
|
|
def main():
|
2025-07-25 16:59:34 +08:00
|
|
|
|
url = "https://help.aliyun.com/help/json/menupath.json?alias=%2Fhologres%2Fproduct-overview%2F&website=cn&language=zh"
|
|
|
|
|
response = requests.get(url)
|
|
|
|
|
data = response.json()
|
|
|
|
|
|
|
|
|
|
result = []
|
|
|
|
|
extract_entries(data['data']['children'], result) # 从顶层children开始递归
|
|
|
|
|
|
|
|
|
|
# 打印结果(或根据需求存储/处理)
|
|
|
|
|
i = 1
|
|
|
|
|
for item in result:
|
|
|
|
|
print(item['url'])
|
|
|
|
|
print(i)
|
|
|
|
|
md(item['url'], i)
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def md(url, title):
|
|
|
|
|
response = requests.get(url)
|
|
|
|
|
html_content = response.content.decode('utf-8') # 解码为字符串
|
|
|
|
|
|
|
|
|
|
# 初始化 html2text 转换器,设置标题样式为 ATX
|
|
|
|
|
converter = html2text.HTML2Text()
|
|
|
|
|
converter.ignore_links = False
|
|
|
|
|
converter.heading_style = "ATX" # 使用 ATX 风格的标题(即 # Heading)
|
|
|
|
|
|
|
|
|
|
markdown_text = converter.handle(html_content)
|
|
|
|
|
print(markdown_text)
|
|
|
|
|
|
|
|
|
|
# 确保目录存在
|
|
|
|
|
os.makedirs("./md", exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 保存 Markdown 文件
|
|
|
|
|
with open(f"./md/{title}.md", "w", encoding="utf-8") as f:
|
|
|
|
|
f.write(markdown_text)
|
2025-07-18 13:52:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2025-07-25 16:59:34 +08:00
|
|
|
|
# md('https://help.aliyun.com/zh/hologres/product-overview/what-is-hologres', 1)
|
2025-07-18 13:52:29 +08:00
|
|
|
|
main()
|