Speech 2.6 HD
The model generates speech from text prompts and multiple voices, optimized for high-fidelity, natural-sounding output.
Setup your API Key
If you don’t have an API key for the AI/ML API yet, feel free to use our Quickstart guide.
Code Example
import os
import requests
def main():
url = "https://api.aimlapi.com/v1/tts"
headers = {
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
}
payload = {
"model": "minimax/speech-2.6-hd",
"text": "Hi! What are you doing today?",
"voice_setting": {
"voice_id": "Wise_Woman"
}
}
response = requests.post(url, headers=headers, json=payload, stream=True)
dist = os.path.abspath("your_file_name.wav")
with open(dist, "wb") as write_stream:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
write_stream.write(chunk)
print("Audio saved to:", dist)
main()import fs from "fs";
import path from "path";
async function main() {
const url = "https://api.aimlapi.com/v1/tts";
const payload = {
model: "minimax/speech-2.6-hd",
text: "Hi! What are you doing today?",
voice_setting: {
voice_id: "Wise_Woman"
}
};
const response = await fetch(url, {
method: "POST",
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": `Bearer <YOUR_AIMLAPI_KEY>`,
"Content-Type": "application/json"
},
body: JSON.stringify(payload)
});
// Read response as ArrayBuffer and convert to Buffer
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
// Save audio to file in the current working directory
const dist = path.join(process.cwd(), "your_file_name.wav");
fs.writeFileSync(dist, buffer);
console.log("Audio saved to:", dist);
}
main();Generation time: ~ 5.8 s.
API Schema
post
Authorizations
Body
modelundefined · enumRequiredPossible values:
textstring · min: 1 · max: 5000Required
The text content to be converted to speech.
streambooleanOptionalDefault:
Enable streaming mode for real-time audio generation. When enabled, audio is generated and delivered in chunks as it's processed.
falselanguage_booststring · enumOptionalPossible values:
Language recognition enhancement option.
subtitle_enablebooleanOptionalDefault:
Enable subtitle generation service. Only available for non-streaming requests. Generates timing information for the synthesized speech.
falseoutput_formatstring · enumOptionalDefault:
Format of the output content for non-streaming requests. Controls how the generated audio data is encoded in the response.
hexPossible values: Responses
201Success
application/json
post
/v1/ttsPOST /v1/tts HTTP/1.1
Host: api.aimlapi.com
Authorization: Bearer YOUR_SECRET_TOKEN
Content-Type: application/json
Accept: */*
Content-Length: 509
{
"model": "minimax/speech-2.6-hd",
"text": "text",
"voice_setting": {
"voice_id": "Wise_Woman",
"speed": 1,
"vol": 1,
"pitch": 0,
"emotion": "happy",
"text_normalization": false
},
"audio_setting": {
"sample_rate": 8000,
"bitrate": 32000,
"format": "mp3",
"channel": 1
},
"pronunciation_dict": {
"tone": [
"text"
]
},
"timbre_weights": [
{
"voice_id": "Wise_Woman",
"weight": 1
}
],
"stream": false,
"language_boost": "Chinese",
"voice_modify": {
"pitch": 1,
"intensity": 1,
"timbre": 1,
"sound_effects": "spacious_echo"
},
"subtitle_enable": false,
"output_format": "hex"
}201Success
{
"metadata": {
"transaction_key": "text",
"request_id": "text",
"sha256": "text",
"created": "2025-11-06T13:04:37.682Z",
"duration": 1,
"channels": 1,
"models": [
"text"
],
"model_info": {
"ANY_ADDITIONAL_PROPERTY": {
"name": "text",
"version": "text",
"arch": "text"
}
}
}
}Last updated
Was this helpful?