import base64 import os import requests host = "https://openspeech.bytedance.com" def train(appid, token, audio_path, spk_id, batch_size=1): # 将batch_size默认值改为1 url = host + "/api/v1/mega_tts/audio/upload" headers = { "Content-Type": "application/json", "Authorization": "Bearer;" + token, "Resource-Id": "volc.megatts.voiceclone", } # 获取所有音频文件 audio_files = [] if os.path.isdir(audio_path): for file_name in os.listdir(audio_path): file_path = os.path.join(audio_path, file_name) if os.path.isfile(file_path) and file_name.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')): audio_files.append(file_path) else: audio_files = [audio_path] # 添加文件数量信息输出 print(f"总共找到 {len(audio_files)} 个音频文件") # 分批上传 for i in range(0, len(audio_files), batch_size): batch_files = audio_files[i:i + batch_size] audios = [] for file_path in batch_files: encoded_data, audio_format = encode_audio_file(file_path) audios.append({"audio_bytes": encoded_data, "audio_format": audio_format}) data = {"appid": appid, "speaker_id": spk_id, "audios": audios, "source": 2, "language": 0, "model_type": 1} print(f"正在上传批次 {i // batch_size + 1},包含 {len(audios)} 个文件...") response = requests.post(url, json=data, headers=headers) print("status code = ", response.status_code) if response.status_code != 200: raise Exception("train请求错误:" + response.text) print("headers = ", response.headers) print(response.json()) print("-" * 50) # 分隔符,便于查看每批次结果 def get_status(appid, token, spk_id): url = host + "/api/v1/mega_tts/status" headers = { "Content-Type": "application/json", "Authorization": "Bearer;" + token, "Resource-Id": "volc.megatts.voiceclone", } body = {"appid": appid, "speaker_id": spk_id} response = requests.post(url, headers=headers, json=body) print(response.json()) def encode_audio_file(file_path): with open(file_path, 'rb') as audio_file: audio_data = audio_file.read() encoded_data = str(base64.b64encode(audio_data), "utf-8") audio_format = os.path.splitext(file_path)[1][1:] # 获取文件扩展名作为音频格式 return encoded_data, audio_format if __name__ == "__main__": appid = "9407991441" token = "VBI4pixTt-GaARTdacAAdQPrHMY333Di" spk_id = "S_xQVFJrvA1" train(appid=appid, token=token, audio_path="./马保国语音包", spk_id=spk_id) get_status(appid=appid, token=token, spk_id=spk_id)