Speech 2.6 HD

This documentation is valid for the following model:

minimax/speech-2.6-hd

The model generates speech from text prompts and multiple voices, optimized for high-fidelity, natural-sounding output.

Setup your API Key

If you don’t have an API key for the AI/ML API yet, feel free to use our Quickstart guide.

Code Example

import os
import requests

def main():
    url = "https://api.aimlapi.com/v1/tts"
    headers = {
        # Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
        "Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
    }
    payload = {
        "model": "minimax/speech-2.6-hd",
        "text": "Hi! What are you doing today?",
        "voice_setting": {
          "voice_id": "Wise_Woman"
        }
    }

    response = requests.post(url, headers=headers, json=payload, stream=True)
    dist = os.path.abspath("your_file_name.wav")

    with open(dist, "wb") as write_stream:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                write_stream.write(chunk)

    print("Audio saved to:", dist)

main()

import fs from "fs";
import path from "path";

async function main() {
  const url = "https://api.aimlapi.com/v1/tts";
  const payload = {
    model: "minimax/speech-2.6-hd",
    text: "Hi! What are you doing today?",
    voice_setting: {
      voice_id: "Wise_Woman"
    }
  };

  const response = await fetch(url, {
    method: "POST",
    headers: {
      // Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
      "Authorization": `Bearer <YOUR_AIMLAPI_KEY>`,
      "Content-Type": "application/json"
    },
    body: JSON.stringify(payload)
  });

  // Read response as ArrayBuffer and convert to Buffer
  const arrayBuffer = await response.arrayBuffer();
  const buffer = Buffer.from(arrayBuffer);

  // Save audio to file in the current working directory
  const dist = path.join(process.cwd(), "your_file_name.wav");
  fs.writeFileSync(dist, buffer);

  console.log("Audio saved to:", dist);
}

main();

Response

Audio saved to: c:\Users\user\Documents\Python Scripts\TTSes\your_file_name.wav

Generation time: ~ 5.8 s.

API Schema

post

Authorizations

Body

modelundefined · enumRequiredPossible values:

textstring · min: 1 · max: 5000Required

The text content to be converted to speech.

streambooleanOptional

Enable streaming mode for real-time audio generation. When enabled, audio is generated and delivered in chunks as it's processed.

Default: false

language_booststring · enumOptional

Language recognition enhancement option.

Possible values:

subtitle_enablebooleanOptional

Enable subtitle generation service. Only available for non-streaming requests. Generates timing information for the synthesized speech.

Default: false

output_formatstring · enumOptional

Format of the output content for non-streaming requests. Controls how the generated audio data is encoded in the response.

Default: hexPossible values:

Responses

201Success

application/json

post

/v1/tts

POST /v1/tts HTTP/1.1
Host: api.aimlapi.com
Authorization: Bearer YOUR_SECRET_TOKEN
Content-Type: application/json
Accept: */*
Content-Length: 509

{
  "model": "minimax/speech-2.6-hd",
  "text": "text",
  "voice_setting": {
    "voice_id": "Wise_Woman",
    "speed": 1,
    "vol": 1,
    "pitch": 0,
    "emotion": "happy",
    "text_normalization": false
  },
  "audio_setting": {
    "sample_rate": 8000,
    "bitrate": 32000,
    "format": "mp3",
    "channel": 1
  },
  "pronunciation_dict": {
    "tone": [
      "text"
    ]
  },
  "timbre_weights": [
    {
      "voice_id": "Wise_Woman",
      "weight": 1
    }
  ],
  "stream": false,
  "language_boost": "Chinese",
  "voice_modify": {
    "pitch": 1,
    "intensity": 1,
    "timbre": 1,
    "sound_effects": "spacious_echo"
  },
  "subtitle_enable": false,
  "output_format": "hex"
}

201Success

{
  "metadata": {
    "transaction_key": "text",
    "request_id": "text",
    "sha256": "text",
    "created": "2025-11-06T13:04:37.682Z",
    "duration": 1,
    "channels": 1,
    "models": [
      "text"
    ],
    "model_info": {
      "ANY_ADDITIONAL_PROPERTY": {
        "name": "text",
        "version": "text",
        "arch": "text"
      }
    }
  }
}

PreviousSpeech 2.6 Turbo NextContent Moderation Models

Last updated 12 hours ago

Was this helpful?