unified_python/tts/音频上传.py

import base64
import os
import requests


host = "https://openspeech.bytedance.com"


def train(appid, token, audio_path, spk_id, batch_size=1):  # 将batch_size默认值改为1
    url = host + "/api/v1/mega_tts/audio/upload"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer;" + token,
        "Resource-Id": "volc.megatts.voiceclone",
    }

    # 获取所有音频文件
    audio_files = []
    if os.path.isdir(audio_path):
        for file_name in os.listdir(audio_path):
            file_path = os.path.join(audio_path, file_name)
            if os.path.isfile(file_path) and file_name.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')):
                audio_files.append(file_path)
    else:
        audio_files = [audio_path]

    # 添加文件数量信息输出
    print(f"总共找到 {len(audio_files)} 个音频文件")

    # 分批上传
    for i in range(0, len(audio_files), batch_size):
        batch_files = audio_files[i:i + batch_size]
        audios = []

        for file_path in batch_files:
            encoded_data, audio_format = encode_audio_file(file_path)
            audios.append({"audio_bytes": encoded_data, "audio_format": audio_format})

        data = {"appid": appid, "speaker_id": spk_id, "audios": audios, "source": 2, "language": 0, "model_type": 1}
        print(f"正在上传批次 {i // batch_size + 1}，包含 {len(audios)} 个文件...")

        response = requests.post(url, json=data, headers=headers)
        print("status code = ", response.status_code)
        if response.status_code != 200:
            raise Exception("train请求错误:" + response.text)
        print("headers = ", response.headers)
        print(response.json())
        print("-" * 50)  # 分隔符，便于查看每批次结果


def get_status(appid, token, spk_id):
    url = host + "/api/v1/mega_tts/status"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer;" + token,
        "Resource-Id": "volc.megatts.voiceclone",
    }
    body = {"appid": appid, "speaker_id": spk_id}
    response = requests.post(url, headers=headers, json=body)
    print(response.json())


def encode_audio_file(file_path):
    with open(file_path, 'rb') as audio_file:
        audio_data = audio_file.read()
        encoded_data = str(base64.b64encode(audio_data), "utf-8")
        audio_format = os.path.splitext(file_path)[1][1:]  # 获取文件扩展名作为音频格式
        return encoded_data, audio_format


if __name__ == "__main__":
    appid = "9407991441"
    token = "VBI4pixTt-GaARTdacAAdQPrHMY333Di"
    spk_id = "S_xQVFJrvA1"
    train(appid=appid, token=token, audio_path="./马保国语音包", spk_id=spk_id)
    get_status(appid=appid, token=token, spk_id=spk_id)
1 2025-08-06 17:26:07 +08:00			`import base64`
			`import os`
			`import requests`


			`host = "https://openspeech.bytedance.com"`


			`def train(appid, token, audio_path, spk_id, batch_size=1): # 将batch_size默认值改为1`
			`url = host + "/api/v1/mega_tts/audio/upload"`
			`headers = {`
			`"Content-Type": "application/json",`
			`"Authorization": "Bearer;" + token,`
			`"Resource-Id": "volc.megatts.voiceclone",`
			`}`

			`# 获取所有音频文件`
			`audio_files = []`
			`if os.path.isdir(audio_path):`
			`for file_name in os.listdir(audio_path):`
			`file_path = os.path.join(audio_path, file_name)`
			`if os.path.isfile(file_path) and file_name.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')):`
			`audio_files.append(file_path)`
			`else:`
			`audio_files = [audio_path]`

			`# 添加文件数量信息输出`
			`print(f"总共找到 {len(audio_files)} 个音频文件")`

			`# 分批上传`
			`for i in range(0, len(audio_files), batch_size):`
			`batch_files = audio_files[i:i + batch_size]`
			`audios = []`

			`for file_path in batch_files:`
			`encoded_data, audio_format = encode_audio_file(file_path)`
			`audios.append({"audio_bytes": encoded_data, "audio_format": audio_format})`

			`data = {"appid": appid, "speaker_id": spk_id, "audios": audios, "source": 2, "language": 0, "model_type": 1}`
			`print(f"正在上传批次 {i // batch_size + 1}，包含 {len(audios)} 个文件...")`

			`response = requests.post(url, json=data, headers=headers)`
			`print("status code = ", response.status_code)`
			`if response.status_code != 200:`
			`raise Exception("train请求错误:" + response.text)`
			`print("headers = ", response.headers)`
			`print(response.json())`
			`print("-" * 50) # 分隔符，便于查看每批次结果`


			`def get_status(appid, token, spk_id):`
			`url = host + "/api/v1/mega_tts/status"`
			`headers = {`
			`"Content-Type": "application/json",`
			`"Authorization": "Bearer;" + token,`
			`"Resource-Id": "volc.megatts.voiceclone",`
			`}`
			`body = {"appid": appid, "speaker_id": spk_id}`
			`response = requests.post(url, headers=headers, json=body)`
			`print(response.json())`


			`def encode_audio_file(file_path):`
			`with open(file_path, 'rb') as audio_file:`
			`audio_data = audio_file.read()`
			`encoded_data = str(base64.b64encode(audio_data), "utf-8")`
			`audio_format = os.path.splitext(file_path)[1][1:] # 获取文件扩展名作为音频格式`
			`return encoded_data, audio_format`


			`if __name__ == "__main__":`
			`appid = "9407991441"`
			`token = "VBI4pixTt-GaARTdacAAdQPrHMY333Di"`
			`spk_id = "S_xQVFJrvA1"`
			`train(appid=appid, token=token, audio_path="./马保国语音包", spk_id=spk_id)`
			`get_status(appid=appid, token=token, spk_id=spk_id)`