优化文字识别

This commit is contained in:
尹舟 2025-04-18 11:30:51 +08:00
parent 653322dd17
commit 8c738f747e
4 changed files with 78 additions and 2 deletions

View File

@ -7,6 +7,10 @@ services:
restart: always restart: always
container_name: douyin container_name: douyin
image: registry.cn-hangzhou.aliyuncs.com/yinzhou_docker_hub/douyin:yz1 image: registry.cn-hangzhou.aliyuncs.com/yinzhou_docker_hub/douyin:yz1
#环境变量需要替换成自己的
environment:
DASHSCOPE_API_KEY: sk-063b48fffb914d558ddcddc5166fd34d
ports: ports:
- "1314:1314" - "1314:1314"
depends_on: depends_on:

36
main.py
View File

@ -2,6 +2,7 @@ from fastapi import FastAPI, Query
import uvicorn import uvicorn
import requests import requests
from utils.response import success_response, error_400_response, error_503_response from utils.response import success_response, error_400_response, error_503_response
from utils.music_analysis import music_analysis
app = FastAPI(title="douyin文档", swagger_ui_parameters={ app = FastAPI(title="douyin文档", swagger_ui_parameters={
"defaultModelsExpandDepth": -1 "defaultModelsExpandDepth": -1
@ -11,6 +12,36 @@ DEFAULT_PC_VIDEO_URL = 'https://v.douyin.com/-NvlqBdIJo4/'
DEFAULT_mobile_VIDEO_URL = 'https://v.douyin.com/BCfMrTFPYGQ/' DEFAULT_mobile_VIDEO_URL = 'https://v.douyin.com/BCfMrTFPYGQ/'
@app.get('/douyin_video', tags=["抖音"], summary="返回视屏信息,最全的接口了")
def douyin_video(video_url: str = Query(DEFAULT_mobile_VIDEO_URL, min_length=10)):
print(video_url)
if not video_url:
return {"code": 400, "message": "An error occurred.", "data": "请指定video_url"}
# 获取get请求参数
url = "http://douyin_tiktok_download_api:9579/api/hybrid/video_data"
querystring = {"url": video_url, "minimal": "true"}
response = requests.request("GET", url, params=querystring)
print(response.text)
print(response.json()['data']['music']['play_url'])
try:
if response.json()['code'] == 200:
video_url=response.json()['data']['video_data']['wm_video_url']
video_music=response.json()['data']['music']['play_url']['url_list'][0]
content=music_analysis(video_music)
return success_response({"video_url": video_url,"video_music": video_music,"content": content
})
except Exception as e:
print(e)
return error_400_response({"data": "解析字段失败."})
return error_503_response({"data": "抖音风控稍后请求."})
@app.get('/douyin_content', tags=["抖音"], summary="手机和pc获取文案") @app.get('/douyin_content', tags=["抖音"], summary="手机和pc获取文案")
def douyin_content(video_url: str = Query(DEFAULT_mobile_VIDEO_URL, min_length=10)): def douyin_content(video_url: str = Query(DEFAULT_mobile_VIDEO_URL, min_length=10)):
print(video_url) print(video_url)
@ -69,12 +100,13 @@ def douyin_phone(video_url: str = Query(DEFAULT_mobile_VIDEO_URL, min_length=10)
# 获取get请求参数 # 获取get请求参数
url = "http://douyin_tiktok_download_api:9579/api/hybrid/video_data" url = "http://douyin_tiktok_download_api:9579/api/hybrid/video_data"
querystring = {"url": video_url, "minimal": "true"} querystring = {"url": video_url, "minimal": "true"}
response = requests.request("GET", url, params=querystring) response = requests.request("GET", url, params=querystring)
print(response.json()['data']['author']['avatar_thumb']) print(response.text)
try: try:
if response.json()['code'] == 200: if response.json()['code'] == 200:
return success_response({"phone_url": response.json()['data']['author']['avatar_thumb']['url_list'] return success_response({"phone_url": response.json()['data']['video_data']['wm_video_url']
}) })
except Exception as e: except Exception as e:

Binary file not shown.

40
utils/music_analysis.py Normal file
View File

@ -0,0 +1,40 @@
from http import HTTPStatus
from dashscope.audio.asr import Transcription
import dashscope
import requests
def music_analysis(music_url):
transcribe_response = Transcription.async_call(
model='paraformer-v2',
file_urls=[music_url],
language_hints=['zh', 'en'] # “language_hints”只支持paraformer-v2模型
)
while True:
if transcribe_response.output.task_status == 'SUCCEEDED' or transcribe_response.output.task_status == 'FAILED':
break
transcribe_response = Transcription.fetch(task=transcribe_response.output.task_id)
if transcribe_response.status_code == HTTPStatus.OK:
url=transcribe_response.output['results'][0]['transcription_url']
print(url)
# 发送GET请求
response = requests.get(url)
text = ''
# 验证响应状态
if response.status_code == 200:
# 解析JSON数据
data = response.json()
for transcripts in data['transcripts']:
text += transcripts['text']
else:
print(f"请求失败,状态码:{response.status_code}")
return text
if __name__ == '__main__':
music_analysis('https://lf26-music-east.douyinstatic.com/obj/ies-music-hj/7494207652008839996.mp3')