import requests import html2text import os def extract_entries(entries, result_list): """ 递归提取嵌套结构中无子节点的条目信息 :param entries: 当前层级的条目列表 :param result_list: 存储结果的列表 """ for entry in entries: # 检查是否存在子节点 if "children" not in entry or not entry["children"]: # 提取目标字段并拼接URL result_list.append({ "title": entry.get("title", ""), "scm": entry.get("scm", ""), "url": "https://help.aliyun.com" + entry.get("url", "") }) else: # 递归处理子节点 extract_entries(entry["children"], result_list) def main(): url = "https://help.aliyun.com/help/json/menupath.json?alias=%2Fhologres%2Fproduct-overview%2F&website=cn&language=zh" response = requests.get(url) data = response.json() result = [] extract_entries(data['data']['children'], result) # 从顶层children开始递归 # 打印结果(或根据需求存储/处理) i = 1 for item in result: print(item['url']) print(i) md(item['url'], i) i += 1 def md(url, title): response = requests.get(url) html_content = response.content.decode('utf-8') # 解码为字符串 # 初始化 html2text 转换器,设置标题样式为 ATX converter = html2text.HTML2Text() converter.ignore_links = False converter.heading_style = "ATX" # 使用 ATX 风格的标题(即 # Heading) markdown_text = converter.handle(html_content) print(markdown_text) # 确保目录存在 os.makedirs("./md", exist_ok=True) # 保存 Markdown 文件 with open(f"./md/{title}.md", "w", encoding="utf-8") as f: f.write(markdown_text) if __name__ == "__main__": # md('https://help.aliyun.com/zh/hologres/product-overview/what-is-hologres', 1) main()