# Speech 2.8 HD

{% columns %}
{% column width="66.66666666666666%" %}
{% hint style="info" %}
This documentation is valid for the following list of our models:

* `minimax/speech-2.8-hd`
  {% endhint %}
  {% endcolumn %}

{% column width="33.33333333333334%" %} <a href="https://aimlapi.com/app/minimax/speech-2-8-hd" class="button primary">Try in Playground</a>
{% endcolumn %}
{% endcolumns %}

The model generates speech from text prompts and multiple voices, optimized for high-fidelity, natural-sounding output.

## Setup your API Key

If you don’t have an API key for the AI/ML API yet, feel free to use our [Quickstart guide](https://docs.aimlapi.com/quickstart/setting-up).

## API Schema

## POST /v1/tts

>

```json
{"openapi":"3.0.0","info":{"title":"AIML API","version":"1.0.0"},"servers":[{"url":"https://api.aimlapi.com"}],"paths":{"/v1/tts":{"post":{"operationId":"_v1_tts","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"object","properties":{"model":{"type":"string","enum":["minimax/speech-2.8-turbo"]},"text":{"type":"string","minLength":1,"maxLength":5000,"description":"The text content to be converted to speech."},"voice_setting":{"type":"object","properties":{"voice_id":{"anyOf":[{"type":"string","enum":["Wise_Woman","Friendly_Person","Inspirational_girl","Deep_Voice_Man","Calm_Woman","Casual_Guy","Lively_Girl","Patient_Man","Young_Knight","Determined_Man","Lovely_Girl","Decent_Boy","Imposing_Manner","Elegant_Man","Abbess","Sweet_Girl_2","Exuberant_Girl"]},{"type":"string","minLength":1,"maxLength":64}],"default":"Wise_Woman","description":"A predefined system voice for text-to-speech synthesis."},"speed":{"type":"number","minimum":0.5,"maximum":2,"default":1,"description":"Adjusts the speed of the voice. A value of 1.0 is the default speed, while values less than 1.0 slow down the speech, and values greater than 1.0 speed it up."},"vol":{"type":"number","minimum":0.01,"maximum":10,"default":1,"description":"The volume of the generated speech. Range: (0, 10]. Larger values indicate larger volumes."},"pitch":{"type":"number","minimum":-12,"maximum":12,"default":0,"description":"The pitch of the generated speech. Range: [-12, 12]. 0 = default voice output."},"emotion":{"type":"string","enum":["happy","sad","angry","fearful","disgusted","surprised","neutral"],"description":"Emotional tone to apply to the synthesized speech. Controls the emotional expression of the generated voice output."},"text_normalization":{"type":"boolean","default":false,"description":"English text normalization support. Improves number-reading but increases latency."}},"default":{"voice_id":"Wise_Woman"},"description":"Voice settings overriding stored settings for the given voice. They are applied only on the given request."},"audio_setting":{"type":"object","properties":{"sample_rate":{"type":"integer","description":"Audio sample rate in Hz.","enum":[8000,16000,22050,24000,32000,44100]},"bitrate":{"type":"integer","description":"Audio bitrate in bits per second. Controls the compression level and audio quality. Higher bitrates provide better quality but larger file sizes.","enum":[32000,64000,128000,256000]},"format":{"type":"string","enum":["mp3","pcm","flac"],"default":"mp3","description":"Audio output format. MP3 provides good compression and compatibility, PCM offers uncompressed high quality, and FLAC provides lossless compression."},"channel":{"type":"integer","description":"Number of audio channels. 1 for mono (single channel), 2 for stereo (dual channel) output.","enum":[1,2]}},"description":"Audio output configuration"},"pronunciation_dict":{"type":"object","properties":{"tone":{"type":"array","items":{"type":"string"},"description":"Replacement of text and pronunciations. Format: [\"燕少飞/(yan4)(shao3)(fei1)\", \"达菲/(da2)(fei1)\", \"omg/oh my god\"]"}},"required":["tone"],"description":"Custom pronunciation dictionary for handling specific words or phrases. Allows fine-tuning of how certain text should be pronounced using phonetic representations."},"timbre_weights":{"type":"array","items":{"type":"object","properties":{"voice_id":{"anyOf":[{"type":"string","enum":["Wise_Woman","Friendly_Person","Inspirational_girl","Deep_Voice_Man","Calm_Woman","Casual_Guy","Lively_Girl","Patient_Man","Young_Knight","Determined_Man","Lovely_Girl","Decent_Boy","Imposing_Manner","Elegant_Man","Abbess","Sweet_Girl_2","Exuberant_Girl"]},{"type":"string","minLength":1,"maxLength":64}],"description":"A predefined system voice for text-to-speech synthesis."},"weight":{"type":"integer","minimum":1,"maximum":100,"description":"Weight for voice mixing. Range: [1, 100]. Higher weights are sampled more heavily."}},"required":["voice_id","weight"]},"maxItems":4,"description":"Voice mixing configuration allowing combination of up to 4 different voices with specified weights. Each voice contributes to the final output based on its weight value (1-100)."},"language_boost":{"type":"string","enum":["Chinese","Chinese,Yue","English","Arabic","Russian","Spanish","French","Portuguese","German","Turkish","Dutch","Ukrainian","Vietnamese","Indonesian","Japanese","Italian","Korean","Thai","Polish","Romanian","Greek","Czech","Finnish","Hindi","Bulgarian","Danish","Hebrew","Malay","Persian","Slovak","Swedish","Croatian","Filipino","Hungarian","Norwegian","Slovenian","Catalan","Nynorsk","Tamil","Afrikaans","auto"],"description":"Language recognition enhancement option."},"voice_modify":{"type":"object","properties":{"pitch":{"type":"integer","minimum":-100,"maximum":100,"description":"Pitch level (-100 to 100)"},"intensity":{"type":"integer","minimum":-100,"maximum":100,"description":"Intensity level (-100 to 100)"},"timbre":{"type":"integer","minimum":-100,"maximum":100,"description":"Timbre level (-100 to 100)"},"sound_effects":{"type":"string","enum":["spacious_echo","auditorium_echo","lofi_telephone","robotic"],"description":"Audio effects to apply to the synthesized speech. Includes options like spacious_echo, auditorium_echo, lofi_telephone, and robotic effects."}},"description":"Voice modification settings for adjusting pitch, intensity, timbre, and applying sound effects to customize the voice characteristics."},"subtitle_enable":{"type":"boolean","default":false,"description":"Enable subtitle generation service. Only available for non-streaming requests. Generates timing information for the synthesized speech."},"stream":{"type":"boolean","enum":[false],"default":false}},"required":["model","text"],"title":"minimax/speech-2.8-turbo"}}}},"responses":{"200":{"content":{"application/json":{"schema":{"type":"object","properties":{"audio":{"type":"string","format":"uri"},"meta":{"type":"object","nullable":true,"properties":{"usage":{"type":"object","nullable":true,"properties":{"credits_used":{"type":"number","description":"The number of tokens consumed during generation."},"usd_spent":{"type":"number","description":"The total amount of money spent by the user in USD."}},"required":["credits_used","usd_spent"]}},"description":"Additional details about the generation."}},"required":["audio"]}},"audio/wav":{"schema":{"type":"string","format":"binary","description":"Audio stream"}}}}}}}}}
```

## Code Example

{% tabs %}
{% tab title="Python" %}
{% code overflow="wrap" %}

```python
import requests
import json  # for getting a structured output with indentation

def main():
    url = "https://api.aimlapi.com/v1/tts"
    headers = {
        # Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
        "Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
    }
    payload = {
        "model": "minimax/speech-2.6-hd",
        "text": "Hi! What are you doing today?",
        "voice_setting": {
          "voice_id": "Wise_Woman"
        }
    }

    response = requests.post(url, headers=headers, json=payload, stream=True)
    data = response.json()
    print(json.dumps(data, indent=2, ensure_ascii=False))

main()
```

{% endcode %}
{% endtab %}

{% tab title="JavaScript" %}
{% code overflow="wrap" %}

```javascript
async function main() {
  const url = "https://api.aimlapi.com/v1/tts";

  const payload = {
    model: "minimax/speech-2.6-hd",
    text: "Hi! What are you doing today?",
    voice_setting: {
      voice_id: "Wise_Woman"
    }
  };

  const response = await fetch(url, {
    method: "POST",
    headers: {
      "Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
      "Content-Type": "application/json"
    },
    body: JSON.stringify(payload)
  });

  const data = await response.json();
  console.log(JSON.stringify(data, null, 2));
}

main().catch(console.error);
```

{% endcode %}
{% endtab %}
{% endtabs %}

<details>

<summary>Response</summary>

{% code overflow="wrap" %}

```
{
  "audio": {
    "url": "https://cdn.aimlapi.com/moose/audio%2Ftts-20260417163722-puBlFtqrtDaehpIV.mp3?Expires=1776501443&OSSAccessKeyId=LTAI5tCpJNKCf5EkQHSuL9xg&Signature=0ejy8m4Ni3WOygPfWbPhWMi6%2B%2B4%3D"
  },
  "meta": {
    "usage": {
      "credits_used": 7541,
      "usd_spent": 0.0037705
    }
  }
}
```

{% endcode %}

</details>

Listen to the audio sample we generated (\~ 3 s):

{% embed url="<https://drive.google.com/file/d/1qCCVggnAz61LM1u-5ZuEleGrGIcxSX25/view?usp=sharing>" %}
