> For the complete documentation index, see [llms.txt](https://docs.aimlapi.com/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://docs.aimlapi.com/api-references/speech-models/text-to-speech/microsoft/mai-voice-2.md).

# MAI Voice 2

{% columns %}
{% column width="66.66666666666666%" %}
{% hint style="info" %}
This documentation is valid for the following list of our models:

* `microsoft/mai-voice-2`
  {% endhint %}
  {% endcolumn %}

{% column width="33.33333333333334%" %} <a href="https://aimlapi.com/app/microsoft/mai-voice-2" class="button primary">Try in Playground</a>
{% endcolumn %}
{% endcolumns %}

## Model Overview

MAI-Voice-2 — text-to-speech model from Microsoft. Generates natural and expressive speech with support for multiple languages, voices, and audio formats.

## Setup your API Key

If you don't have an API key for the AI/ML API yet, feel free to use our [Quickstart guide](https://docs.aimlapi.com/quickstart/setting-up).

## API Schema

## POST /v1/tts

>

```json
{"openapi":"3.0.0","info":{"title":"AIML API","version":"1.0.0"},"servers":[{"url":"https://api.aimlapi.com"}],"paths":{"/v1/tts":{"post":{"operationId":"_v1_tts","requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"object","properties":{"model":{"type":"string","enum":["microsoft/mai-voice-2"]},"text":{"type":"string","minLength":1,"maxLength":4096,"description":"The text content to be converted to speech."},"voice":{"type":"string","enum":["en-US-AvaNeural","en-US-AndrewNeural","en-US-EmmaNeural","en-US-BrianNeural","en-US-JennyNeural","en-US-GuyNeural","en-US-AriaNeural","en-US-DavisNeural","en-US-JaneNeural","en-US-NancyNeural","en-US-TonyNeural","en-US-SaraNeural","en-US-Ava:DragonHDLatestNeural","en-US-Andrew:DragonHDLatestNeural","en-US-Emma:DragonHDLatestNeural","en-US-Brian:DragonHDLatestNeural","en-GB-SoniaNeural","en-GB-RyanNeural","fr-FR-DeniseNeural","de-DE-KatjaNeural","es-ES-ElviraNeural"],"default":"en-US-AvaNeural","description":"Name of the voice to be used."},"response_format":{"type":"string","enum":["mp3","wav","pcm"],"default":"mp3","description":"Format of the output content for non-streaming requests. Controls how the generated audio data is encoded in the response."},"max_tokens":{"type":"integer","minimum":1,"description":"The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API."},"temperature":{"type":"number","minimum":0,"maximum":2,"default":1,"description":"What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both."},"top_p":{"type":"number","minimum":0,"maximum":1,"default":1,"description":"An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both."},"max_completion_tokens":{"type":"integer","minimum":1,"description":"An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens."},"stream":{"type":"boolean","enum":[false],"default":false}},"required":["model","text"],"title":"microsoft/mai-voice-2"}}}},"responses":{"200":{"content":{"application/json":{"schema":{"type":"object","properties":{"audio":{"type":"string","format":"uri","description":"The URL of the generated audio file."},"meta":{"type":"object","nullable":true,"properties":{"usage":{"type":"object","nullable":true,"properties":{"credits_used":{"type":"number","description":"The number of tokens consumed during generation."},"usd_spent":{"type":"number","description":"The total amount of money spent by the user in USD."}},"required":["credits_used","usd_spent"]}},"description":"Additional details about the generation."}},"required":["audio"]}}}}}}}}}
```

## Code Example

{% tabs %}
{% tab title="Python" %}
{% code overflow="wrap" %}

```python
import requests

def main():
    url = "https://api.aimlapi.com/v1/tts"
    headers = {
        # Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
        "Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
    }
    payload = {
        "model": "microsoft/mai-voice-2",
        "text": "Cities of the future promise to radically transform how people live, work, and move.",
        "voice": "en-US-AvaNeural",
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()

        response_data = response.json()
        audio_url = response_data["audio"]["url"]

        audio_response = requests.get(audio_url, stream=True)
        audio_response.raise_for_status()

        # result = os.path.join(os.path.dirname(__file__), "audio.mp3")  # if you run this code as a .py file
        result = "audio.mp3"  # if you run this code in Jupyter Notebook

        with open(result, "wb") as write_stream:
            for chunk in audio_response.iter_content(chunk_size=8192):
                if chunk:
                    write_stream.write(chunk)

        print("Audio saved to:", result)

    except requests.exceptions.RequestException as e:
        print(f"Error making request: {e}")

if __name__ == "__main__":
    main()
```

{% endcode %}
{% endtab %}

{% tab title="JS" %}
{% code overflow="wrap" %}

```javascript
const https = require("https");
const fs = require("fs");

// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
const apiKey = "<YOUR_AIMLAPI_KEY>";

const data = JSON.stringify({
  model: "microsoft/mai-voice-2",
  text: "Cities of the future promise to radically transform how people live, work, and move.",
  voice: "en-US-AvaNeural",
});

const options = {
  hostname: "api.aimlapi.com",
  path: "/v1/tts",
  method: "POST",
  headers: {
    "Authorization": `Bearer ${apiKey}`,
    "Content-Type": "application/json",
    "Content-Length": Buffer.byteLength(data),
  }
};

const req = https.request(options, (res) => {
  if (res.statusCode >= 400) {
    let error = "";
    res.on("data", chunk => error += chunk);
    res.on("end", () => console.error(`Error ${res.statusCode}:`, error));
    return;
  }

  let body = "";
  res.on("data", chunk => body += chunk);
  res.on("end", () => {
    const audioUrl = JSON.parse(body).audio.url;

    https.get(audioUrl, (audioRes) => {
      const file = fs.createWriteStream("audio.mp3");
      audioRes.pipe(file);
      file.on("finish", () => {
        file.close();
        console.log("Audio saved to audio.mp3");
      });
    });
  });
});

req.on("error", (e) => console.error("Request error:", e));
req.write(data);
req.end();
```

{% endcode %}
{% endtab %}
{% endtabs %}

<details>

<summary>Response</summary>

```
Audio saved to: audio.mp3
```

</details>


---

# Agent Instructions
This documentation is published with GitBook. GitBook is the documentation platform designed so that both humans and AI agents can read, navigate, and reason over technical content effectively. Learn more at gitbook.com.

## Querying This Documentation
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter, and the optional `goal` query parameter:

```
GET https://docs.aimlapi.com/api-references/speech-models/text-to-speech/microsoft/mai-voice-2.md?ask=<question>&goal=<endgoal>
```

`ask` is the immediate question: it should be specific, self-contained, and written in natural language.
`goal` is optional and describes the broader end goal you are ultimately trying to accomplish on behalf of the user. GitBook uses it to tailor the answer towards what is most useful for that goal.

The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.