douyin/utils/music_analysis.py

78 lines
2.6 KiB
Python
Raw Normal View History

2025-04-18 11:56:07 +08:00
import json
2025-04-18 11:30:51 +08:00
from http import HTTPStatus
from dashscope.audio.asr import Transcription
import dashscope
import requests
2025-04-18 11:33:24 +08:00
import os
2025-04-18 11:56:07 +08:00
from openai import OpenAI
2025-04-18 11:30:51 +08:00
2025-04-18 18:40:54 +08:00
dashscope.api_key = os.getenv("DASHSCOPE_API_KEY", "*****")
content = """
将文本的链接提取出来只留链接的有效信息
# 输出
只输出链接不需要分析过程
"""
2025-04-18 11:30:51 +08:00
def music_analysis(music_url):
transcribe_response = Transcription.async_call(
model='paraformer-v2',
file_urls=[music_url],
language_hints=['zh', 'en'] # “language_hints”只支持paraformer-v2模型
)
while True:
if transcribe_response.output.task_status == 'SUCCEEDED' or transcribe_response.output.task_status == 'FAILED':
break
transcribe_response = Transcription.fetch(task=transcribe_response.output.task_id)
if transcribe_response.status_code == HTTPStatus.OK:
2025-04-18 18:40:54 +08:00
url = transcribe_response.output['results'][0]['transcription_url']
2025-04-18 11:30:51 +08:00
print(url)
# 发送GET请求
response = requests.get(url)
text = ''
# 验证响应状态
if response.status_code == 200:
# 解析JSON数据
data = response.json()
for transcripts in data['transcripts']:
text += transcripts['text']
else:
print(f"请求失败,状态码:{response.status_code}")
return text
2025-04-18 18:40:54 +08:00
2025-04-18 11:56:07 +08:00
def chat_analysis(video_url):
2025-04-18 18:40:54 +08:00
text = ''
2025-04-18 11:56:07 +08:00
client = OpenAI(
# 若没有配置环境变量请用百炼API Key将下行替换为api_key="sk-xxx",
2025-04-18 18:40:54 +08:00
api_key=os.getenv("DASHSCOPE_API_KEY", "*****"),
2025-04-18 11:56:07 +08:00
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
model="qwen-plus",
# 此处以qwen-plus为例可按需更换模型名称。模型列表https://help.aliyun.com/zh/model-studio/getting-started/models
messages=[
{
'role': 'system',
2025-04-18 18:40:54 +08:00
'content': content
2025-04-18 11:56:07 +08:00
},
{'role': 'user', 'content': video_url}],
)
2025-04-18 18:40:54 +08:00
print(f'语言模型返回数据: {completion.model_dump()}')
2025-04-18 11:56:07 +08:00
for choices in completion.model_dump()['choices']:
2025-04-18 18:40:54 +08:00
text += choices['message']['content']
return text
2025-04-18 11:56:07 +08:00
2025-04-18 18:40:54 +08:00
def video_url(video_url):
print(video_url)
2025-04-18 11:30:51 +08:00
if __name__ == '__main__':
2025-04-18 11:56:07 +08:00
# music_analysis('https://lf26-music-east.douyinstatic.com/obj/ies-music-hj/7494207652008839996.mp3')
2025-04-18 18:40:54 +08:00
chat_analysis(
'原始视频url为9.79 复制打开抖音,看看【学钓鱼的佳琪的作品】少与人纠缠,多跟鱼拉扯 # dou是钓鱼人 # 爱... https://v.douyin.com/1skvRRYPEgA/ DHI:/ n@D.uf 05/06')