77 lines
2.7 KiB
Python
77 lines
2.7 KiB
Python
import base64
|
|
import os
|
|
import requests
|
|
|
|
|
|
host = "https://openspeech.bytedance.com"
|
|
|
|
|
|
def train(appid, token, audio_path, spk_id, batch_size=1): # 将batch_size默认值改为1
|
|
url = host + "/api/v1/mega_tts/audio/upload"
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": "Bearer;" + token,
|
|
"Resource-Id": "volc.megatts.voiceclone",
|
|
}
|
|
|
|
# 获取所有音频文件
|
|
audio_files = []
|
|
if os.path.isdir(audio_path):
|
|
for file_name in os.listdir(audio_path):
|
|
file_path = os.path.join(audio_path, file_name)
|
|
if os.path.isfile(file_path) and file_name.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')):
|
|
audio_files.append(file_path)
|
|
else:
|
|
audio_files = [audio_path]
|
|
|
|
# 添加文件数量信息输出
|
|
print(f"总共找到 {len(audio_files)} 个音频文件")
|
|
|
|
# 分批上传
|
|
for i in range(0, len(audio_files), batch_size):
|
|
batch_files = audio_files[i:i + batch_size]
|
|
audios = []
|
|
|
|
for file_path in batch_files:
|
|
encoded_data, audio_format = encode_audio_file(file_path)
|
|
audios.append({"audio_bytes": encoded_data, "audio_format": audio_format})
|
|
|
|
data = {"appid": appid, "speaker_id": spk_id, "audios": audios, "source": 2, "language": 0, "model_type": 1}
|
|
print(f"正在上传批次 {i // batch_size + 1},包含 {len(audios)} 个文件...")
|
|
|
|
response = requests.post(url, json=data, headers=headers)
|
|
print("status code = ", response.status_code)
|
|
if response.status_code != 200:
|
|
raise Exception("train请求错误:" + response.text)
|
|
print("headers = ", response.headers)
|
|
print(response.json())
|
|
print("-" * 50) # 分隔符,便于查看每批次结果
|
|
|
|
|
|
def get_status(appid, token, spk_id):
|
|
url = host + "/api/v1/mega_tts/status"
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": "Bearer;" + token,
|
|
"Resource-Id": "volc.megatts.voiceclone",
|
|
}
|
|
body = {"appid": appid, "speaker_id": spk_id}
|
|
response = requests.post(url, headers=headers, json=body)
|
|
print(response.json())
|
|
|
|
|
|
def encode_audio_file(file_path):
|
|
with open(file_path, 'rb') as audio_file:
|
|
audio_data = audio_file.read()
|
|
encoded_data = str(base64.b64encode(audio_data), "utf-8")
|
|
audio_format = os.path.splitext(file_path)[1][1:] # 获取文件扩展名作为音频格式
|
|
return encoded_data, audio_format
|
|
|
|
|
|
if __name__ == "__main__":
|
|
appid = "9407991441"
|
|
token = "VBI4pixTt-GaARTdacAAdQPrHMY333Di"
|
|
spk_id = "S_xQVFJrvA1"
|
|
train(appid=appid, token=token, audio_path="./马保国语音包", spk_id=spk_id)
|
|
get_status(appid=appid, token=token, spk_id=spk_id)
|
|
|