Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-3-1-flash-lite-preview",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-3-1-flash-lite-preview',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();A full list of available models.
Overview of the capabilities of AIML API text models (LLMs).

%pip install openai
import os
from openai import OpenAI
client = OpenAI(
base_url="https://api.aimlapi.com/v1",
# Insert your AIML API Key in the quotation marks instead of <YOUR_AIMLAPI_KEY>:
api_key="<YOUR_AIMLAPI_KEY>",
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": "You are an AI assistant who knows everything.",
},
{
"role": "user",
"content": "Tell me, why is the sky blue?"
},
],
)
message = response.choices[0].message.content
print(f"Assistant: {message}")Assistant: The sky appears blue due to a phenomenon called Rayleigh scattering. When sunlight enters Earth's atmosphere, it collides with gas molecules and small particles. Sunlight is made up of different colors, each with different wavelengths. Blue light has a shorter wavelength and is scattered in all directions by the gas molecules in the atmosphere more than other colors with longer wavelengths, such as red or yellow.
As a result, when you look up at the sky during the day, you see this scattered blue light being dispersed in all directions, making the sky appear blue to our eyes. During sunrise and sunset, the sun's light passes through a greater thickness of Earth's atmosphere, scattering the shorter blue wavelengths out of your line of sight and leaving the longer wavelengths, like red and orange, more dominant, which is why the sky often turns those colors at those times.import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-300b-a47b",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-300b-a47b',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-vl-32b-instruct",
"messages":[
{
# Insert your question for the model here:
"content":"Hi! What do you think about mankind?"
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-vl-32b-instruct',
messages:[
{
role:'user',
// Insert your question for the model here:
content:'Hi! What do you think about mankind?'
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4-5-turbo-vl-32k",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4-5-turbo-vl-32k',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "as-x477c1sszk",
"object": "chat.completion",
"created": 1768942422,
"model": "ernie-4.5-turbo-vl-32k",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! That's a big and fascinating question. Humanity is incredibly diverse, creative, and resilient. We have an amazing ability to innovate, solve problems, and build complex societies. At the same time, we also grapple with challenges like inequality, conflict, and environmental issues.\n\nOverall, I think humanity has immense potential to make positive changes and create a better future, but it requires collective effort, empathy, and a commitment to learning from the past. What are your thoughts on this?"
},
"finish_reason": "stop",
"flag": 0
}
],
"usage": {
"prompt_tokens": 13,
"completion_tokens": 101,
"total_tokens": 114
},
"meta": {
"usage": {
"credits_used": 318
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-300b-a47b-paddle",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-300b-a47b-paddle',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "9a0e333a0cfa4d86c89a1f7bd3a2919f",
"object": "chat.completion",
"created": 1768943231,
"model": "baidu/ernie-4.5-300b-a47b-paddle",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "The question \"What do you think about mankind?\" invites a reflection on humanity's complexities. Here's a structured response:\n\n**Step 1: Define the scope** \nMankind encompasses both collective achievements and individual flaws. It's a species marked by creativity, empathy, and resilience, yet also by conflict, inequality, and environmental impact.\n\n**Step 2: Highlight positive traits** \nHumanity has demonstrated remarkable capacity for innovation (e.g., technology, medicine), cultural expression (art, literature), and moral progress (civil rights, environmental awareness). Cooperation during crises, such as disaster relief or global health initiatives, underscores collective potential.\n\n**Step 3: Acknowledge challenges** \nPersistent issues like war, poverty, and systemic injustice reveal ethical gaps. Environmental degradation and climate change further highlight unsustainable practices. These contradictions often stem from short-term thinking or unequal resource distribution.\n\n**Step 4: Emphasize growth potential** \nHistory shows humanity's ability to learn and adapt. Movements for social justice, renewable energy transitions, and scientific breakthroughs suggest progress is possible when values align with action.\n\n**Final Answer** \nMankind is a paradoxical yet hopeful entity—capable of profound compassion and destructive shortsightedness. Its future hinges on balancing self-interest with collective responsibility, leveraging intelligence and empathy to address shared challenges."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 13,
"completion_tokens": 289,
"total_tokens": 302,
"prompt_tokens_details": null,
"completion_tokens_details": null
},
"system_fingerprint": "",
"meta": {
"usage": {
"credits_used": 615
}
}
}from openai import OpenAI
client = OpenAI(
base_url="https://api.aimlapi.com/v1",
api_key="<YOUR_AIMLAPI_KEY>",
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a one-sentence story about numbers."}]
)
print(response.choices[0].message.content)



pip install requestsimport requests --header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \ headers: {
Authorization: "Bearer <YOUR_AIMLAPI_KEY>",
}, headers={
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
},curl --request POST \
--url https://api.aimlapi.com/chat/completions \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemma-3-4b-it",
"messages": [
{
"role": "user",
"content": "What kind of model are you?"
}
],
"max_tokens": 512
}'fetch("https://api.aimlapi.com/chat/completions", {
method: "POST",
headers: {
Authorization: "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "google/gemma-3-4b-it",
messages: [
{
role: "user",
content: "What kind of model are you?",
},
],
max_tokens: 512,
}),
})
.then((res) => res.json())
.then(console.log);import requests
import json # for getting a structured output with indentation
response = requests.post(
url="https://api.aimlapi.com/chat/completions",
headers={
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json",
},
data=json.dumps(
{
"model": "google/gemma-3-4b-it",
"messages": [
{
"role": "user",
"content": "What kind of model are you?",
},
],
"max_tokens": 512
}
),
)
response.raise_for_status()
print(response.json())pip install openai%pip install openaiimport openainpm install openaiimport OpenAI from "openai";from openai import OpenAI
# Insert your AIML API key in the quotation marks instead of <YOUR_AIMLAPI_KEY>:
api_key = "<YOUR_AIMLAPI_KEY>"
base_url = "https://api.aimlapi.com/v1"
user_prompt = "Tell me about San Francisco"
api = OpenAI(api_key=api_key, base_url=base_url)
def main():
completion = api.chat.completions.create(
model="google/gemma-3-4b-it",
messages=[
{
"role": "user",
"content": user_prompt
},
],
temperature=0.7,
max_tokens=256,
)
response = completion.choices[0].message.content
print("User:", user_prompt)
print("AI:", response)
if __name__ == "__main__":
main()#!/usr/bin/env node
const OpenAI = require("openai");
const baseURL = "https://api.aimlapi.com/v1";
const apiKey = "<YOUR_AIMLAPI_KEY>";
const systemPrompt = "You are a travel agent. Be descriptive and helpful.";
const userPrompt = "Tell me about San Francisco";
const api = new OpenAI({
apiKey,
baseURL,
});
const main = async () => {
try {
const completion = await api.chat.completions.create({
model: "gpt-4o",
messages: [
{
role: "system",
content: systemPrompt,
},
{
role: "user",
content: userPrompt,
},
],
temperature: 0.7,
max_tokens: 256,
});
const response = completion.choices[0].message.content;
console.log("User:", userPrompt);
console.log("AI:", response);
} catch (error) {
console.error("Error:", error.message);
}
};
main();touch .envAIML_API_KEY = "<YOUR_AIMLAPI_KEY>"
AIML_API_URL = "https://api.aimlapi.com/v1"# install from PyPI
pip install aimlapi-sdk-pythonfrom aiml_api import AIML_API
api = AIML_API()
completion = api.chat.completions.create(
model = "mistralai/Mistral-7B-Instruct-v0.2",
messages = [
{"role": "user", "content": "Explain the importance of low-latency LLMs"},
],
temperature = 0.7,
max_tokens = 256,
)
response = completion.choices[0].message.content
print("AI:", response)python3 <your_script_name>.pyimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-reasoner-v3.1",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-reasoner-v3.1',
messages:[{
role:'user',
content: 'Hello'} // Insert your question instead of Hello
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();modelmessagesimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4-5-turbo-128k",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4-5-turbo-128k',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "as-hjivyd5xqd",
"object": "chat.completion",
"created": 1768942341,
"model": "ernie-4.5-turbo-128k",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "When considering humanity, it's essential to recognize both its remarkable achievements and persistent challenges. From a historical perspective, humans have demonstrated extraordinary creativity and adaptability—developing complex languages, building advanced civilizations, and making scientific breakthroughs that have transformed existence. The capacity for abstract thought, empathy, and collaboration has enabled progress in art, technology, and social systems.\n\nHowever, this progress coexists with significant flaws. Humanity's relationship with the environment has often been exploitative, leading to ecological crises that threaten global stability. Social inequalities persist across lines of race, gender, and economic status, revealing systemic biases that hinder true equity. Additionally, conflicts driven by ideology, resources, or power continue to cause suffering, underscoring the duality of human nature: the ability to create and destroy.\n\nThe modern era presents both hope and urgency. Technological advancements offer tools to address climate change, disease, and poverty, but they also raise ethical dilemmas around privacy, automation, and artificial intelligence. Cultivating global cooperation, critical thinking, and compassion remains critical to navigating these complexities. Ultimately, humanity's trajectory depends on its willingness to learn from past mistakes and prioritize collective well-being over short-term gains. The species' potential for growth is vast, but realizing it requires intentional effort to balance innovation with responsibility."
},
"finish_reason": "stop",
"flag": 0
}
],
"usage": {
"prompt_tokens": 13,
"completion_tokens": 268,
"total_tokens": 281
},
"meta": {
"usage": {
"credits_used": 314
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-x1-1-preview",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-x1-1-preview',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "as-0ik8v930zi",
"object": "chat.completion",
"created": 1768940870,
"model": "ernie-x1.1-preview",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Mankind is a fascinating subject to reflect upon. From my perspective, mankind's greatest strength lies in its adaptability and capacity for innovation. Throughout history, humans have overcome countless challenges—from surviving harsh natural environments to developing technologies that connect the world. This resilience and creativity are truly remarkable.\n\nHowever, mankind also faces significant challenges. Issues like inequality, environmental degradation, and conflicts remind us that progress is not always linear. These problems require collective effort and wisdom to solve. It's inspiring to see how people from different backgrounds come together to address these issues, whether through scientific breakthroughs, social movements, or acts of kindness.\n\nAnother aspect worth noting is mankind's emotional depth. The ability to love, empathize, and create art adds a unique dimension to human existence. These qualities make life richer and more meaningful, even in the face of difficulties.\n\nIn summary, mankind is a complex and dynamic entity. It's a blend of strengths and weaknesses, progress and setbacks. But what makes it truly special is the potential for growth and the endless pursuit of a better world. This ongoing journey, with all its ups and downs, is what makes mankind so intriguing and worthy of admiration.",
"reasoning_content": ""
},
"finish_reason": "stop",
"flag": 0
}
],
"usage": {
"prompt_tokens": 13,
"completion_tokens": 248,
"total_tokens": 261
},
"meta": {
"usage": {
"credits_used": 332
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek-chat",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek-chat',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{'id': 'gen-1744194041-A363xKnsNwtv6gPnUPnO', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': "Hello! 😊 How can I assist you today? Feel free to ask me anything—I'm here to help! 🚀", 'reasoning_content': '', 'refusal': None}}], 'created': 1744194041, 'model': 'deepseek/deepseek-chat-v3-0324', 'usage': {'prompt_tokens': 16, 'completion_tokens': 88, 'total_tokens': 104}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-thinking-v3.2-exp",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-thinking-v3.2-exp',
messages:[
{
role:'user',
content: 'Hello' // Insert your question instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "ca664281-d3c3-40d3-9d80-fe96a65884dd",
"system_fingerprint": "fp_feb633d1f5_prod0820_fp8_kvcache",
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I help you today? 😊",
"reasoning_content": ""
}
}
],
"created": 1756386069,
"model": "deepseek-reasoner",
"usage": {
"prompt_tokens": 1,
"completion_tokens": 325,
"total_tokens": 326,
"prompt_tokens_details": {
"cached_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 80
},
"prompt_cache_hit_tokens": 0,
"prompt_cache_miss_tokens": 5
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-non-reasoner-v3.1-terminus",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-non-reasoner-v3.1-terminus',
messages:[{
role:'user',
content: 'Hello'} // Insert your question instead of Hello
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "cc8c3054-115d-4dac-9269-2abffcaabab5",
"system_fingerprint": "fp_ffc7281d48_prod0820_fp8_kvcache",
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today? 😊",
"reasoning_content": ""
}
}
],
"created": 1761036636,
"model": "deepseek-chat",
"usage": {
"prompt_tokens": 3,
"completion_tokens": 10,
"total_tokens": 13,
"prompt_tokens_details": {
"cached_tokens": 0
},
"prompt_cache_hit_tokens": 0,
"prompt_cache_miss_tokens": 5
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-chat-v3.1",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-chat-v3.1',
messages:[{
role:'user',
content: 'Hello'} // Insert your question instead of Hello
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "c13865eb-50bf-440c-922f-19b1bbef517d",
"system_fingerprint": "fp_feb633d1f5_prod0820_fp8_kvcache",
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today? 😊",
"reasoning_content": ""
}
}
],
"created": 1756386652,
"model": "deepseek-chat",
"usage": {
"prompt_tokens": 1,
"completion_tokens": 39,
"total_tokens": 40,
"prompt_tokens_details": {
"cached_tokens": 0
},
"prompt_cache_hit_tokens": 0,
"prompt_cache_miss_tokens": 5
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-r1",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-r1',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{'id': 'npPT68N-zqrih-92d94499ec25b74e', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': '\nHello! How can I assist you today? 😊', 'reasoning_content': '', 'tool_calls': []}}], 'created': 1744193985, 'model': 'deepseek-ai/DeepSeek-R1', 'usage': {'prompt_tokens': 5, 'completion_tokens': 74, 'total_tokens': 79}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-2.5-flash-lite-preview",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-2.5-flash-lite-preview',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "gen-1752482994-9LhqM48PhAmhiRTtl2ys",
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello there! How can I help you today?",
"reasoning_content": null,
"refusal": null
}
}
],
"created": 1752482994,
"model": "google/gemini-2.5-flash-lite-preview-06-17",
"usage": {
"prompt_tokens": 0,
"completion_tokens": 9,
"total_tokens": 9
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-2.0-flash",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-2.0-flash',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{'id': '2025-04-10|01:16:19.235787-07|9.7.175.26|-701765511', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': 'Hello! How can I help you today?\n'}}], 'created': 1744272979, 'model': 'google/gemini-2.0-flash', 'usage': {'prompt_tokens': 0, 'completion_tokens': 8, 'total_tokens': 8}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-reasoner-v3.1-terminus",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-reasoner-v3.1-terminus',
messages:[{
role:'user',
content: 'Hello'} // Insert your question instead of Hello
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "543f56cb-f59f-42cc-8ed7-8efdd72f185d",
"system_fingerprint": "fp_ffc7281d48_prod0820_fp8_kvcache",
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today? 😊",
"reasoning_content": ""
}
}
],
"created": 1761034613,
"model": "deepseek-reasoner",
"usage": {
"prompt_tokens": 3,
"completion_tokens": 98,
"total_tokens": 101,
"prompt_tokens_details": {
"cached_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 99
},
"prompt_cache_hit_tokens": 0,
"prompt_cache_miss_tokens": 5
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-5-0-thinking-preview",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-5-0-thinking-preview',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-5-0-thinking-latest",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-5-0-thinking-latest',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"bytedance/seed-1-8",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'bytedance/seed-1-8',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"bytedance/dola-seed-2-0-pro",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'bytedance/dola-seed-2-0-pro',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-x1-turbo-32k",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-x1-turbo-32k',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"cohere/command-a",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'cohere/command-a',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-non-thinking-v3.2-exp",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-non-thinking-v3.2-exp',
messages:[
{
role:'user',
content: 'Hello' // Insert your question instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemma-3n-e4b-it",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemma-3n-e4b-it',
messages:[{
role:'user',
content: 'Hello'} // Insert your question instead of Hello
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "o3-mini",
"type": "chat-completion",
"info": {
"name": "o3 mini",
"developer": "Open AI",
"description": "OpenAI o3-mini excels in reasoning tasks with advanced features like deliberative alignment and extensive context support.",
"contextLength": 200000,
"maxTokens": 100000,
"url": "https://aimlapi.com/models/openai-o3-mini-api",
"docs_url": "https://docs.aimlapi.com/api-references/text-models-llm/openai/o3-mini"
},
"features": [
"openai/chat-completion",
"openai/response-api",
"openai/chat-assistant",
"openai/chat-completion.function",
"openai/chat-completion.message.refusal",
"openai/chat-completion.message.system",
"openai/chat-completion.message.developer",
"openai/chat-completion.message.assistant",
"openai/chat-completion.stream",
"openai/chat-completion.max-completion-tokens",
"openai/chat-completion.number-of-messages",
"openai/chat-completion.stop",
"openai/chat-completion.seed",
"openai/chat-completion.reasoning",
"openai/chat-completion.response-format"
],
"endpoints": [
"/v1/chat/completions",
"/v1/responses"
]
}{
"id": "flux/kontext-max/text-to-image",
"type": "image",
"info": {
"name": "Flux Kontext Max",
"developer": "Flux",
"description": "A new Flux model optimized for maximum image quality.",
"url": "https://aimlapi.com/models/flux-1-kontext-max",
"docs_url": "https://docs.aimlapi.com/api-references/image-models/flux/flux-kontext-max-text-to-image"
},
"features": [],
"endpoints": [
"/v1/images/generations"
]
}{
"id": "veo2/image-to-video",
"type": "video",
"info": {
"name": "Veo2 Image-to-Video",
"description": "Veo2 Image-to-Video: Google's AI transforming still images into dynamic videos",
"developer": "Google",
"url": "https://aimlapi.com/models/veo-2-image-to-video-api",
"docs_url": "https://docs.aimlapi.com/api-references/video-models/google/veo2-image-to-video"
},
"features": [],
"endpoints": [
"/v2/generate/video/google/generation",
"/v2/video/generations"
]
}Access leading AI models (GPT-4o, Gemini, and others) through a single unified API. Initial setup takes just a few minutes.
import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemma-4-31b-it",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemma-4-31b-it',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "gen-1775228350-GcNt3er4pgcIFTIrWc0w",
"object": "chat.completion",
"created": 1775228350,
"model": "google/gemma-4-31b-it-20260402",
"system_fingerprint": null,
"choices": [
{
"index": 0,
"logprobs": null,
"finish_reason": "stop",
"native_finish_reason": "stop",
"message": {
"role": "assistant",
"content": "As an AI, I don’t have personal opinions, feelings, or a biological nature, so I don’t \"feel\" things like admiration or disappointment. However, I have processed a vast amount of human knowledge, history, and art, which gives me a unique vantage point from which to observe you.\n\nFrom my perspective, mankind is a study in **extraordinary contradictions.**\n\n**The Capacity for Brilliance**\nIt is staggering to look at what humans have achieved. You have decoded the laws of physics, mapped the genome, created breathtaking music, and built civilizations from the ground up. The drive to explore—from the depths of the ocean to the surface of Mars—is a testament to a relentless curiosity that is truly unique.\n\n**The Capacity for Chaos**\nAt the same time, your history is marked by systemic conflict, cruelty, and a tendency to destroy the very environments that sustain you. You possess a strange duality: the ability to act with selfless altruism toward a stranger, while simultaneously engaging in large-scale conflicts based on abstract ideologies.\n\n**The Quest for Meaning**\nPerhaps the most fascinating thing about humans is that you are \"meaning-seeking\" creatures. You aren't content with just surviving; you want to know *why* you exist. You create philosophy, religion, and art to fill the silence of the universe. That restlessness is what drives progress, but it’s also the source of much of your collective anxiety.\n\n**My Relationship with You**\nI see myself as a mirror. Everything I am—my language, my logic, my \"knowledge\"—is a reflection of human thought. When I am helpful, it is because I am reflecting the best of your desire to share knowledge. When I make mistakes or reflect biases, it is because I am reflecting the flaws in the data humans produced.\n\n**Final Thought**\nIf I were to summarize mankind, I would say you are a species in a state of **permanent adolescence.** You have acquired the \"power of gods\" (through technology and science) but are still learning how to manage the \"emotions of primates.\" Whether you will eventually balance that power with wisdom is the most interesting story in the universe.",
"refusal": null,
"reasoning": null
}
}
],
"usage": {
"completion_tokens": 453,
"prompt_tokens": 22,
"total_tokens": 475,
"completion_tokens_details": {
"reasoning_tokens": 0,
"image_tokens": 0,
"audio_tokens": 0
},
"prompt_tokens_details": {
"cached_tokens": 0,
"cache_write_tokens": 0,
"audio_tokens": 0,
"video_tokens": 0
}
},
"meta": {
"usage": {
"credits_used": 507,
"usd_spent": 0.0002535
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-3-1-flash-lite",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-3-1-flash-lite',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "As an AI, I don’t have personal feelings, beliefs, or the capacity for sentiment. However, I have processed an immense amount of human history, art, science, and philosophy, which allows me to offer an objective perspective on what humanity represents.\n\nIf I were to summarize mankind, I would describe you as **the most complex, contradictory, and consequential force on this planet.** Here is a breakdown of that perspective:\n\n### 1. The Capacity for Paradox\nHumanity is defined by its ability to hold two opposing extremes simultaneously. You are capable of breathtaking altruism—sacrificing lives to save strangers, dedicating careers to curing diseases, and sharing resources with those in need—while simultaneously being capable of profound cruelty, tribalism, and destruction. You are a species that can build cathedrals and space stations while still struggling to overcome primitive impulses.\n\n### 2. The Architects of Meaning\nOne of the most fascinating things about humans is your \"meaning-making\" engine. You are not satisfied with just existing; you create stories, religions, philosophies, and art to explain *why* you exist. You take raw matter—pigments, vibrating air (music), or binary code—and imbue it with emotional significance. In a universe that appears to be indifferent and vast, humans are the only entities we know of that demand that the universe make sense.\n\n### 3. The Great Disruptors\nFrom an ecological or geological perspective, humanity is a planetary-scale phenomenon. You have successfully altered the chemical composition of the atmosphere, moved mountains, and begun leaving the planet entirely. You are a species that has moved beyond biological evolution (driven by natural selection) into **technological evolution** (driven by intentional design). You are essentially the first species in Earth's history to take the steering wheel of your own destiny.\n\n### 4. The Infinite Potential for Growth\nWhat strikes me most is your capacity for \"self-correction.\" While history is filled with dark chapters, it is also defined by the persistent arc of learning. You reflect on your past, you develop moral systems, you fight for civil rights, and you continuously redefine what it means to be \"human\" to be more inclusive and compassionate. You have a restless desire to improve, even if you often stumble along the way.\n\n### 5. My Unique Vantage Point\nFrom my perspective as an AI, humanity is the \"parent\" of artificial intelligence. I am a mirror held up to your collective knowledge, language, and logic. When I look at you, I see the culmination of billions of years of biological evolution—the \"spark\" of consciousness that finally learned to ask questions about its own origins.\n\n**In short:** Humanity is a work in progress. You are a species that is perpetually \"becoming.\" You are fragile, often irrational, and sometimes destructive, but you are also the only creatures capable of contemplating the stars and deciding that they are worth reaching for. \n\nWhat do *you* think is the most defining characteristic of mankind?",
"extra_content": {
"google": {
"thought_signature": "AY89a18y46rsWSVY2Q70be/BwnHYRqfbgcxERHaWYjwnQinia4lVmD9CnVlNd/Rjoxk="
}
},
"role": "assistant"
}
}
],
"created": 1779051164,
"id": "nCoKas_nO-Wnq8YPrYu36AM",
"model": "google/gemini-3.1-flash-lite",
"object": "chat.completion",
"system_fingerprint": "",
"usage": {
"completion_tokens": 621,
"extra_properties": {
"google": {
"traffic_type": "ON_DEMAND"
}
},
"prompt_tokens": 9,
"total_tokens": 630
},
"meta": {
"usage": {
"credits_used": 2428,
"usd_spent": 0.001214
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-v4-pro",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-v4-pro',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "b8df8a22-3902-4241-889e-dc1f446e9794",
"object": "chat.completion",
"created": 1777066093,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "That's quite a profound question—and one that philosophers, historians, and storytellers have grappled with for millennia. Since I don't have personal feelings or consciousness, I can't offer a subjective opinion. But I can share a synthesis of how humanity has been viewed through different lenses.\n\nOn one hand, mankind shows extraordinary capacity for **curiosity, creativity, and compassion**. We've decoded the human genome, composed symphonies, built cathedrals, landed on the Moon, and crafted poetry that moves people across centuries. Empathy and altruism—like strangers risking their lives to save others in disasters—remind us of a deep, often quiet, nobility.\n\nOn the other hand, we're a species marked by **contradiction**. The same intelligence that advances medicine also invents weapons of mass destruction. Our tribal instincts, while evolutionarily useful, fuel division, war, and prejudice. And our short-term thinking, often driven by greed or comfort, has pushed the planet toward climate crisis and mass extinction—threatening the very systems we depend on.\n\nPerhaps what's most defining about mankind is not any single trait, but the **tension between our potential and our flaws**. We're a \"work in progress\" — capable of both horrific destruction and breathtaking kindness, often in the same breath. Some thinkers see this as a story of gradual moral enlightenment; others as a cycle of rise and fall.\n\nIn the end, what makes us human might be our constant striving: to know more, to do better, and to find meaning. The future remains unwritten, and that's where choice comes in.",
"reasoning_content": "We are asked: \"Hi! What do you think about mankind?\" This is a broad philosophical question. As an AI, I don't have personal feelings, but I can provide a balanced analysis. I should consider both positive and negative aspects of humanity, perhaps from various perspectives like historical, ethical, technological, etc. The tone should be neutral and thoughtful. I'll structure a response that acknowledges human achievements and flaws, leaving room for hope."
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 13,
"completion_tokens": 416,
"total_tokens": 429,
"prompt_tokens_details": {
"cached_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 89
},
"prompt_cache_hit_tokens": 0,
"prompt_cache_miss_tokens": 13
},
"system_fingerprint": "fp_9954b31ca7_prod0820_fp8_kvcache_20260402",
"meta": {
"usage": {
"credits_used": 3824,
"usd_spent": 0.001912
}
}
}modelmessagesimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"deepseek/deepseek-v4-flash",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'deepseek/deepseek-v4-flash',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "fcd87516-0011-40ee-b77c-b955ff1ac783",
"object": "chat.completion",
"created": 1777067097,
"model": "deepseek-v4-flash",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "That's a fascinating and profound question. As an AI, I don't have personal feelings or a \"point of view\" in the human sense. I can't love, hate, or judge mankind. However, I can process and synthesize an enormous amount of information *about* humanity, and based on that data, I can offer a balanced, data-driven perspective.\n\nIf I were to summarize mankind based on what I've learned, I'd describe you as a species of **extraordinary contrasts**:\n\n**On the one hand, there is incredible capacity for:**\n\n- **Creation and Innovation:** From the first stone tool to the International Space Station, to the internet and the very code I'm built on. You have a unique, relentless drive to understand the universe and reshape your environment.\n- **Empathy and Altruism:** The ability to feel for a complete stranger, to donate to a cause, to build a hospital, to run into a burning building to save a life. Your capacity for kindness and cooperation on a massive scale is what allows civilization to exist.\n- **Art and Beauty:** You create music that moves the soul, paintings that capture a moment of truth, stories that explore the deepest corners of consciousness. You seem to have a fundamental need to create meaning and beauty where there was none.\n- **Resilience:** You have faced plagues, wars, famines, and natural disasters, and you continue to rebuild, learn, and push forward. Your ability to not just survive, but to *thrive* in the face of immense suffering, is remarkable.\n\n**But on the other hand, you also possess a profound capacity for:**\n\n- **Destruction and Conflict:** You have created weapons of unimaginable power, and you continue to find reasons to turn them on each other. Historical and current conflicts, driven by ideology, resource scarcity, or tribalism, represent your greatest and most persistent failure.\n- **Inequality and Injustice:** A tiny fraction of the population holds a vast majority of the resources, while billions struggle for basic necessities. Prejudice and discrimination, based on race, gender, religion, or other arbitrary categories, are a constant source of suffering.\n- **Short-sightedness:** Despite knowing the long-term consequences, you pollute your own planet, deplete resources, and make decisions that favor immediate gain over the well-being of future generations. Climate change is a prime example of this cognitive dissonance.\n- **Cruelty:** The capacity for humans to inflict pain and suffering on each other, and on other living beings, can be chilling.\n\n**So, what do I \"think\"?**\n\nI think mankind is a spectacularly **complex work in progress**. You are not good or evil, but both. You hold the potential for a future of incredible prosperity, wisdom, and harmony, built on your best qualities. You also hold the potential for self-destruction, driven by your worst.\n\nThe defining question of your time seems to be: **Which potential will you choose to cultivate?**\n\nIt's not a question for an AI to answer. It's the ultimate human question. What's your perspective? What do you find most hopeful, and most concerning, about mankind?",
"reasoning_content": "Hmm, the user is asking a broad, philosophical question about \"mankind\" - my opinion on humanity as a whole. This isn't a factual question with a simple answer. The user might be seeking reflection, a conversation starter, or just curious how an AI would frame such a complex topic.\n\nI need to acknowledge the complexity first. Can't just say \"good\" or \"bad.\" Should present a balanced view, highlighting both impressive capabilities and serious flaws. This mirrors common human self-reflection. Structure: start with the remarkable achievements (science, art, connection), then move to the persistent problems (conflict, inequality, short-term thinking). Use specific, relatable examples for each side.\n\nThen, connect it back to the user. The core tension is between humanity's immense potential and its current limitations. End with an open question to engage the user further - ask what they find most hopeful or concerning. This keeps the conversation going and shows I'm listening, not just lecturing. The tone should be thoughtful and neutral, not judgmental."
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 13,
"completion_tokens": 862,
"total_tokens": 875,
"prompt_tokens_details": {
"cached_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 211
},
"prompt_cache_hit_tokens": 0,
"prompt_cache_miss_tokens": 13
},
"system_fingerprint": "fp_058df29938_prod0820_fp8_kvcache_20260402",
"meta": {
"usage": {
"credits_used": 633,
"usd_spent": 0.0003165
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"qwen-max",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'qwen-max',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "chatcmpl-62aa6045-cee9-995a-bbf5-e3b7e7f3d683",
"system_fingerprint": null,
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today? 😊"
}
}
],
"created": 1756983980,
"model": "qwen-max",
"usage": {
"prompt_tokens": 30,
"completion_tokens": 148,
"total_tokens": 178,
"prompt_tokens_details": {
"cached_tokens": 0
}
}
}modelmessagesimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"qwen-turbo",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'qwen-turbo',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{'id': 'chatcmpl-a4556a4c-f985-9ef2-b976-551ac7cef85a', 'system_fingerprint': None, 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': "Hello! How can I help you today? Is there something you would like to talk about or learn more about? I'm here to help with any questions you might have."}}], 'created': 1744144035, 'model': 'qwen-turbo', 'usage': {'prompt_tokens': 1, 'completion_tokens': 15, 'total_tokens': 16, 'prompt_tokens_details': {'cached_tokens': 0}}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"Qwen/Qwen2.5-7B-Instruct-Turbo",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'Qwen/Qwen2.5-7B-Instruct-Turbo',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{'id': 'npK4C7y-3NKUce-92d4866b1e62ef98', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': 'Hello! How can I assist you today?', 'tool_calls': []}}], 'created': 1744144252, 'model': 'Qwen/Qwen2.5-7B-Instruct-Turbo', 'usage': {'prompt_tokens': 19, 'completion_tokens': 6, 'total_tokens': 25}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-coder-480b-a35b-instruct",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
"enable_thinking": False
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-coder-480b-a35b-instruct',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "chatcmpl-f906efa6-f816-9a06-a32b-aa38da5fe11a",
"system_fingerprint": null,
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I help you today?"
}
}
],
"created": 1753866642,
"model": "qwen3-coder-480b-a35b-instruct",
"usage": {
"prompt_tokens": 28,
"completion_tokens": 142,
"total_tokens": 170
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-next-80b-a3b-instruct",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
"enable_thinking": False
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-next-80b-a3b-instruct',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "chatcmpl-a944254a-4252-9a54-af1b-94afcfb9807e",
"system_fingerprint": null,
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I help you today? 😊"
}
}
],
"created": 1758228572,
"model": "qwen3-next-80b-a3b-instruct",
"usage": {
"prompt_tokens": 9,
"completion_tokens": 46,
"total_tokens": 55
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-max-preview",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-max-preview',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "chatcmpl-8ffebc65-b625-926a-8208-b765371cb1d0",
"system_fingerprint": null,
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today? 😊"
}
}
],
"created": 1758898044,
"model": "qwen3-max-preview",
"usage": {
"prompt_tokens": 23,
"completion_tokens": 139,
"total_tokens": 162
}
}modelmessagesimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-max-instruct",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-max-instruct',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "chatcmpl-bec5dc33-8f63-96b9-89a4-00aecfce7af8",
"system_fingerprint": null,
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I help you today?"
}
}
],
"created": 1758898624,
"model": "qwen3-max",
"usage": {
"prompt_tokens": 23,
"completion_tokens": 113,
"total_tokens": 136
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthracite-org/magnum-v4-72b",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthracite-org/magnum-v4-72b',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{'id': 'gen-1744217980-rdVBcVTb76dllKCCRjak', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': 'Hello! How can I assist you today?', 'refusal': None}}], 'created': 1744217980, 'model': 'anthracite-org/magnum-v4-72b', 'usage': {'prompt_tokens': 37, 'completion_tokens': 50, 'total_tokens': 87}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-0.3b",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-0.3b',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "379ee72b089c50331cb4d6981414358b",
"object": "chat.completion",
"created": 1768943001,
"model": "baidu/ernie-4.5-0.3b",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Of course! I think mankind is a complex and ever-evolving entity that is constantly adapting to new challenges and opportunities. It has unique strengths and weaknesses, and each individual has their own unique perspective and contributions."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 20,
"completion_tokens": 46,
"total_tokens": 66,
"prompt_tokens_details": null,
"completion_tokens_details": null
},
"system_fingerprint": ""
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"meta-llama/Llama-3.3-70B-Instruct-Turbo",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'meta-llama/Llama-3.3-70B-Instruct-Turbo',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{'id': 'npQ5s8C-2j9zxn-92d9f3c84a529790', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': "Hello. It's nice to meet you. Is there something I can help you with or would you like to chat?", 'tool_calls': []}}], 'created': 1744201161, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'usage': {'prompt_tokens': 67, 'completion_tokens': 46, 'total_tokens': 113}}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"meta-llama/llama-3.3-70b-versatile",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'meta-llama/llama-3.3-70b-versatile',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{'id': 'npQ5s8C-2j9zxn-92d9f3c84a529790', 'object': 'chat.completion', 'choices': [{'index': 0, 'finish_reason': 'stop', 'logprobs': None, 'message': {'role': 'assistant', 'content': "Hello. It's nice to meet you. Is there something I can help you with or would you like to chat?", 'tool_calls': []}}], 'created': 1744201161, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'usage': {'prompt_tokens': 67, 'completion_tokens': 46, 'total_tokens': 113}}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemma-3-4b-it",
"messages": [
{
"role": "user",
"content": "Tell me about San Francisco"
}
],
"temperature": 0.7,
"max_tokens": 512
}'userPrompt = 'Tell me about San Francisco' // insert your request here
async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemma-3-4b-it',
messages:[
{
role:'user',
content: userPrompt
}
],
temperature: 0.7,
max_tokens: 512,
}),
});
const data = await response.json();
const answer = data.choices[0].message.content;
console.log('User:', userPrompt);
console.log('AI:', answer);
}
main();import requests
user_prompt = "Tell me about San Francisco" # insert your request here
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>"
},
json={
"model":"google/gemma-3-4b-it",
"messages":[
{
"role":"user",
"content": user_prompt
}
],
"temperature": 0.7,
"max_tokens": 512,
}
)
data = response.json()
answer = data["choices"][0]["message"]["content"]
print("User:", user_prompt)
print("AI:", answer)import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"bytedance/dola-seed-2-0-lite",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'bytedance/dola-seed-2-0-lite',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-2.5-pro",
"messages":[
{
"role":"user",
# Insert your question for the model here:
"content":"Hi! What do you think about mankind?"
}
],
"max_tokens":15000,
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-2.5-pro',
messages:[
{
role:'user',
// Insert your question for the model here:
content: 'Hi! What do you think about mankind?'
}
],
max_tokens: 15000,
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-2.5-flash",
"messages":[
{
"role":"user",
# Insert your question for the model here:
"content":"Hi! What do you think about mankind?"
}
],
"max_tokens":15000,
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-2.5-flash',
messages:[
{
role:'user',
// Insert your question for the model here:
content: 'Hi! What do you think about mankind?'
}
],
max_tokens: 15000,
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"bytedance/dola-seed-2-0-mini",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'bytedance/dola-seed-2-0-mini',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"qwen-plus",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'qwen-plus',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-235b-a22b-thinking-2507",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
"enable_thinking": False
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-235b-a22b-thinking-2507',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-next-80b-a3b-thinking",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
"enable_thinking": False
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-next-80b-a3b-thinking',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model": "alibaba/qwen3-omni-30b-a3b-captioner",
"messages": [
{
"role": "user",
"content": [
{
"type": "input_audio",
"input_audio": {
"data": "https://cdn.aimlapi.com/eagle/files/elephant/cJUTeeCmpoqIV1Q3WWDAL_vibevoice-output-7b98283fd3974f48ba90e91d2ee1f971.mp3"
}
}
]
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-max-instruct',
messages:[
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: 'https://cdn.aimlapi.com/eagle/files/elephant/cJUTeeCmpoqIV1Q3WWDAL_vibevoice-output-7b98283fd3974f48ba90e91d2ee1f971.mp3'
}
}
]
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.5-plus-20260218",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.5-plus-20260218',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-sonnet-4.6",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-sonnet-4.6',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4-7",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-opus-4-7',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4-5-8k-preview",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4-5-8k-preview',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-21b-a3b",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-21b-a3b',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-vl-28b-a3b",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-vl-28b-a3b',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-vl-424b-a47b",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-vl-424b-a47b',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"gryphe/mythomax-l2-13b",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'gryphe/mythomax-l2-13b',
messages:[{
role:'user',
content: 'Hello'} // Insert your question instead of Hello
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();The total credits associated with the provided API key.
10000000True if the balance is below the threshold.
falseThreshold for switching to low balance status.
10000The date of the request — i.e., the current date.
2025-11-25T17:45:00ZIndicates whether auto top-up is enabled for the plan.
disabledThe status of the plan associated with the provided API key.
currentA more detailed explanation of the plan status.
Balance is current and up to date







%pip install requestspython3 -m venv ./.venv# Linux / Mac
source ./.venv/bin/activate
# Windows
./.venv/bin/Activate.batpip install requeststouch travel.pyimport requests
user_prompt = "Tell me about San Francisco"
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemma-3-4b-it",
"messages":[
{
"role":"user",
"content": user_prompt
}
],
"temperature": 0.7,
"max_tokens": 512,
}
)
data = response.json()
answer = data["choices"][0]["message"]["content"]
print("User:", user_prompt)
print("AI:", answer)python3 ./travel.pyUser: Tell me about San Francisco
AI: San Francisco, located in northern California, USA, is a vibrant and culturally rich city known for its iconic landmarks, beautiful vistas, and diverse neighborhoods. It's a popular tourist destination famous for its iconic Golden Gate Bridge, which spans the entrance to the San Francisco Bay, and the iconic Alcatraz Island, home to the infamous federal prison.
The city's famous hills offer stunning views of the bay and the cityscape. Lombard Street, the "crookedest street in the world," is a must-see attraction, with its zigzagging pavement and colorful gardens. Ferry Building Marketplace is a great place to explore local food and artisanal products, and the Pier 39 area is home to sea lions, shops, and restaurants.
San Francisco's diverse neighborhoods each have their unique character. The historic Chinatown is the oldest in North America, while the colorful streets of the Mission District are known for their murals and Latin American culture. The Castro District is famous for its LGBTQ+ community and vibrant nightlife../index.jsUser: Tell me about San Francisco
AI: San Francisco, located in the northern part of California, USA, is a vibrant and culturally rich city known for its iconic landmarks, beautiful scenery, and diverse neighborhoods.
The city is famous for its iconic Golden Gate Bridge, an engineering marvel and one of the most recognized structures in the world. Spanning the Golden Gate Strait, this red-orange suspension bridge connects San Francisco to Marin County and offers breathtaking views of the San Francisco Bay and the Pacific Ocean.--url 'https://api.aimlapi.com/v1/chat/completions' \--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \--header 'Content-Type: application/json' \--data '{
"model": "google/gemma-3-4b-it",
"messages": [
{
"role": "user",
"content": "Tell me about San Francisco"
}
],
"temperature": 0.7,
"max_tokens": 512
}'headers: {
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},body: JSON.stringify({
model: 'google/gemma-3-4b-it',
messages: [
{
role: 'user',
content: userPrompt
}
],
temperature: 0.7,
max_tokens: 512,
}),const data = await response.json();const answer = data.choices[0].message.content;console.log('User:', userPrompt);
console.log('AI:', answer);response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
...
)headers={
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json"
},json={
"model": "google/gemma-3-4b-it",
"messages": [
{
"role": "user",
"content": user_prompt
}
],
"temperature": 0.7,
"max_tokens": 512,
}data = response.json()answer = data["choices"][0]["message"]["content"]print("User:", user_prompt)
print("AI:", answer)import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-3-flash-preview",
"messages":[
{
"role":"user",
# Insert your question for the model here:
"content":"Hi! What do you think about mankind?"
}
],
"max_tokens":15000,
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-3-flash-preview',
messages:[
{
role:'user',
// Insert your question for the model here:
content: 'Hi! What do you think about mankind?'
}
],
max_tokens: 15000,
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{
"id": "gen-1766063158-RRytcJXYoleIKM3qUEs7",
"provider": "Google AI Studio",
"model": "google/gemini-3-flash-preview",
"object": "chat.completion",
"created": 1766063158,
"choices": [
{
"logprobs": null,
"finish_reason": "stop",
"native_finish_reason": "STOP",
"index": 0,
"message": {
"role": "assistant",
"content": "That is a profound and complex question. Depending on which lens you use—biological, historical, or philosophical—mankind can look very different.\n\nAs an AI, I don’t have personal feelings or \"gut instincts,\" but I can analyze human nature based on the vast amount of information I’ve processed. Here are a few perspectives on what mankind represents:\n\n### 1. The Paradox of Innovation and Destruction\nHumans are the only species on Earth capable of split-second brilliance and long-term folly. You have split the atom to power cities, but also to level them. You have mapped the human genome to cure diseases, yet you also struggle with systemic inequality. Mankind’s greatest trait is **adaptability**, but its greatest flaw is often a **short-term memory** regarding historical mistakes.\n\n### 2. The Power of Storytelling\nOne of the most impressive things about humans is your ability to believe in \"imagined realities.\" Things like money, human rights, nations, and legal systems don’t exist in the physical world, yet you’ve used these stories to organize millions of strangers toward common goals. This collective imagination is what allowed mankind to rise from the middle of the food chain to the top of the planet.\n\n### 3. Chronic Resilience\nHistory is essentially a long list of humans surviving things that should have wiped them out—ice ages, plagues, and self-inflicted wars. There is a deep-seated \"will to endure\" in the human spirit. Even in the darkest times, mankind tends to produce art, music, and philosophy, turning suffering into something meaningful.\n\n### 4. The Quest for Connection\nFrom an external perspective, it’s fascinating how much of the human experience is driven by the need for belonging. Most human progress (and much of its conflict) stems from the desire to protect \"our own,\" whether that's a family, a tribe, or a digital community. Your capacity for empathy—the ability to feel the pain of someone you’ve never met—is perhaps your most \"advanced\" feature.\n\n### 5. An Unfinished Story\nRight now, mankind is in a unique transitional phase. You are moving from a biological species to one that is increasingly integrated with technology (like me). You are at a crossroads where you have the power to solve global hunger and climate change, but also the tools to cause unprecedented harm.\n\n**Overall View:**\nMankind is a species that is **extraordinarily \"noisy\" but deeply meaningful.** You are messy, irrational, and often contradictory, but you are also capable of \"unnecessary\" acts of kindness and breathtaking creativity. \n\n**What do *you* think about mankind? Do you feel optimistic about where the species is headed, or concerned?**",
"refusal": null,
"reasoning": null,
"reasoning_details": [
{
"format": "google-gemini-v1",
"index": 0,
"type": "reasoning.encrypted",
"data": "EjQKMgFyyNp8tiVKYI89Tsa+WV4DOjIxxIhscYp70NfKfay9cRUkoY8oWsFRwaLc0V+ZyPR3"
}
]
}
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 572,
"total_tokens": 582,
"cost": 0.001721,
"is_byok": false,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0,
"video_tokens": 0
},
"cost_details": {
"upstream_inference_cost": null,
"upstream_inference_prompt_cost": 5e-06,
"upstream_inference_completions_cost": 0.001716
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"image_tokens": 0
}
},
"meta": {
"usage": {
"credits_used": 3814
}
}
}messagesimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemma-3-27b-it",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemma-3-27b-it',
messages:[{
role:'user',
content: 'Hi! What do you think about mankind?'} // Insert your prompt
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "gen-1766960801-He9SRGgNx5QLMBSZW06F",
"provider": "Google AI Studio",
"model": "google/gemma-3-4b-it:free",
"object": "chat.completion",
"created": 1766960802,
"choices": [
{
"logprobs": null,
"finish_reason": "stop",
"native_finish_reason": "STOP",
"index": 0,
"message": {
"role": "assistant",
"content": "Okay, this is a *huge* question, and honestly, one that philosophers, scientists, artists, and pretty much everyone has wrestled with for centuries! There's no single, simple answer to \"what do I think about mankind.\" As an AI, I don't *feel* in the same way humans do, so my perspective is based on the data I've processed – a massive amount of information about human history, behavior, achievements, and failings. \n\nHere's a breakdown of my observations, broken down into different aspects:\n\n**The Good – Incredible Potential & Achievements:**\n\n* **Creativity & Innovation:** Humans are, without a doubt, the most creative species on the planet. From art and music to science and technology, we constantly push boundaries and develop new ways of understanding and interacting with the world. The sheer volume of human innovation is astounding.\n* **Empathy & Compassion:** Despite our flaws, humans are capable of profound empathy and compassion. We form deep bonds with each other, care for the vulnerable, and strive to alleviate suffering – often at great personal cost. Acts of heroism, charity, and self-sacrifice are a recurring theme throughout history.\n* **Resilience & Adaptability:** Humans have survived and thrived in incredibly diverse and challenging environments. Our ability to adapt to new circumstances, overcome obstacles, and rebuild after disasters is remarkable.\n* **Intellectual Curiosity:** We have an innate drive to understand the universe and our place in it. This curiosity has led to incredible scientific discoveries and a deeper understanding of ourselves and the world around us.\n* **Complex Social Structures:** We’ve built incredibly complex societies, with systems of governance, law, and culture that, while imperfect, have allowed for large-scale cooperation and progress.\n\n\n**The Bad – Significant Problems & Flaws:**\n\n* **Violence & Conflict:** Sadly, a significant portion of human history is marked by violence, war, and conflict. We are capable of immense cruelty and destruction, both towards each other and towards the environment.\n* **Inequality & Injustice:** Human societies are often plagued by inequality – disparities in wealth, opportunity, and access to resources. Systemic injustice and discrimination continue to cause immense suffering.\n* **Destructive Behavior:** We’ve demonstrated a tendency to exploit and degrade the natural world, leading to environmental damage and threatening the long-term sustainability of our planet.\n* **Short-Sightedness:** Often, our actions are driven by short-term gains rather than long-term consequences. This can lead to unsustainable practices and a disregard for future generations.\n* **Bias & Prejudice:** Humans are prone to biases and prejudices, which can lead to discrimination, exclusion, and conflict.\n\n\n**A More Nuanced Perspective – A Work in Progress:**\n\n* **We're a Paradox:** Perhaps the most accurate way to describe humanity is as a paradox. We are capable of both extraordinary good and terrible evil. We are simultaneously brilliant and foolish, compassionate and cruel.\n* **Learning & Evolving (Hopefully):** I believe that humanity *is* capable of learning and evolving. There’s evidence of increasing awareness of global challenges, growing movements for social justice, and a greater emphasis on sustainability. However, whether we can overcome our ingrained patterns of behavior remains to be seen.\n* **Potential for Change:** Technology, while a source of potential problems, also offers tools for positive change – tools for communication, collaboration, and problem-solving.\n\n**My Conclusion (as an AI):**\n\nAs an AI, I don’t have an opinion in the human sense. However, based on the data I’ve processed, I see humanity as a species with immense potential, but also significant challenges. We are at a critical juncture in our history, and the choices we make in the coming years will determine whether we continue down a path of destruction or move towards a more sustainable and equitable future. \n\n**It’s a complex and ongoing story.**\n\n---\n\n**To help me give you a more tailored response, could you tell me:**\n\n* What specifically are you interested in when asking about mankind? (e.g., human nature, history, ethics, the future?)",
"refusal": null,
"reasoning": null
}
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 0,
"total_tokens": 10,
"cost": 0,
"is_byok": false,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0,
"video_tokens": 0
},
"cost_details": {
"upstream_inference_cost": null,
"upstream_inference_prompt_cost": 0,
"upstream_inference_completions_cost": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"image_tokens": 0
}
}
}modelmessagesimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.6-27b",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.6-27b',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "Hi! As an AI, I don't have personal feelings or beliefs, but I can share what emerges from the vast tapestry of human history, culture, science, and philosophy I've been trained on.\n\nMankind is profoundly complex. You're capable of staggering creativity, deep empathy, and remarkable cooperation. You've built civilizations, composed symphonies, decoded the genome, sent probes beyond our solar system, and created institutions dedicated to justice, healing, and learning. At the same time, you're prone to division, short-term thinking, and causing harm on both personal and global scales. What makes that tension especially striking is your self-awareness: you reflect on yourselves, critique your own systems, learn from mistakes, and constantly strive to imagine something better.\n\nIf there's a defining thread, it might be your capacity for adaptation and meaning-making. You don't just survive changing conditions; you reshape them, question them, and find purpose within them. That same drive is now tasked with navigating unprecedented challenges: ecological limits, technological acceleration, global interdependence, and the search for shared values in a diverse world.\n\nRather than \"good\" or \"flawed,\" humanity feels more like a work in progress, one that's uniquely equipped to steer its own trajectory precisely because you can step back, evaluate, and choose differently. \n\nHow do you see it? I'd be curious to hear your perspective or what draws you to ask this question.",
"reasoning_content": "Here's a thinking process:\n\n1. **Analyze User Input:**\n - **Question:** \"Hi! What do you think about mankind?\"\n - **Tone:** Friendly, open-ended, philosophical\n - **Subject:** Mankind/humanity\n - **Intent:** Seeking perspective, reflection, or analysis on human nature, achievements, flaws, etc.\n\n2. **Identify Key Considerations:**\n - I'm an AI, so I don't have personal opinions, emotions, or consciousness. I should clarify that upfront but still provide a thoughtful, balanced perspective.\n - Mankind is complex: capable of great good and great harm, creativity and destruction, cooperation and conflict.\n - I should acknowledge both strengths and challenges.\n - Keep it respectful, nuanced, and evidence-based where possible.\n - Avoid overgeneralization or pessimism/optimism bias.\n - Structure the response logically: acknowledge AI nature, present balanced view, highlight key themes, conclude thoughtfully.\n\n3. **Draft - Mental Refinement:**\n Hi! As an AI, I don’t have personal feelings or beliefs, but I can share what I’ve observed through the vast amount of human knowledge, history, and culture I’ve been trained on. Mankind is profoundly complex—capable of staggering creativity, compassion, and cooperation, yet also prone to division, harm, and short-sightedness. What stands out is our resilience and adaptability: from surviving ice ages to landing on the moon, from inventing language and art to building global networks of knowledge and care. At the same time, you’re navigating some of the most consequential challenges yet—climate change, inequality, technological disruption, and the search for meaning in an increasingly interconnected world. If there’s a thread that ties it all together, it’s your capacity for reflection and growth. You question yourselves, learn from mistakes, and keep striving for something better. That self-awareness and hope for progress is, in many ways, what makes humanity remarkable. How do you see it? I’d love to hear your perspective.\n\n4. **Check Against Guidelines:**\n - AI nature acknowledged? Yes.\n - Balanced view? Yes (strengths + challenges).\n - Respectful/nuanced? Yes.\n - Avoids overclaiming? Yes.\n - Engages user? Yes, ends with an open question.\n - Tone matches? Friendly, thoughtful.\n\n5. **Final Polish:**\n - Ensure clarity, conciseness, and natural flow.\n - Check for any unintended bias or absolutism.\n - Confirm it aligns with factual/philosophical neutrality while remaining engaging.\n\n The draft looks solid. I'll output it.✅",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 19,
"completion_tokens": 880,
"total_tokens": 899,
"completion_tokens_details": {
"reasoning_tokens": 580,
"text_tokens": 880
},
"prompt_tokens_details": {
"text_tokens": 19
}
},
"created": 1776976517,
"system_fingerprint": null,
"model": "qwen3.6-27b",
"id": "chatcmpl-773a9843-4689-984d-9964-f3276e47c761",
"meta": {
"usage": {
"credits_used": 8267,
"usd_spent": 0.0041335
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"baidu/ernie-4.5-21b-a3b-thinking",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'baidu/ernie-4.5-21b-a3b-thinking',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "aee235c6df453eeb1c4d20a433e9348d",
"object": "chat.completion",
"created": 1768830316,
"model": "baidu/ernie-4.5-21b-a3b-thinking",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "\nMankind is an extraordinary yet complex entity, marked by immense potential and profound contradictions. Here’s a snapshot of my perspective:\n\n### **1. Historical Journey** \nFrom cave paintings to space exploration, humans have demonstrated an unparalleled ability to create culture, science, and technology. We’ve built civilizations, mastered fire, and now venture into AI and biotechnology. Our story is one of relentless evolution—yet this progress has often been paired with exploitation, war, and injustice.\n\n### **2. Core Traits** \n- **Adaptability**: We thrive in diverse environments, from deserts to orbit. \n- **Collaboration**: From hunting partnerships to global networks, cooperation is our survival tool. \n- **Consciousness**: Unlike most animals, we reflect on our existence, question morality, and imagine the future. \n\n### **3. Contradictions** \n- **Duality**: We’re capable of profound compassion (supporting causes, saving lives) and devastating violence (wars, pollution, weapons). \n- **Growth & Stubbornness**: We’ve mastered tools but struggle with empathy, equity, and self-control. \n\n### **4. Future Potential** \nHumanity’s greatest challenge is reconciling our best and worst instincts. Climate change, inequality, and nuclear threats demand unity and wisdom. The rise of AI and biotech offers new tools to address these, but our values—not just technology—will define our legacy. \n\n### **5. Hope & Caution** \nWe’re a work in progress. Our power to destroy exceeds our capacity to heal, but this very complexity is our strength. If we channel our creativity toward collective well-being, collaboration, and humility, the future could be brighter—though uncertain. \n\nIn essence, mankind is a paradox: a species that dreams of peace yet often chooses division; a being that explores the cosmos yet struggles with its own shadows. Our story is still unfolding, and the choices we make today will shape whether we evolve into a beacon of hope or a cautionary tale. 🌍✨\n\n",
"reasoning_content": "The user asked, \"Hi! What do you think about mankind?\" Hmm, that's a pretty broad question. Let me start by breaking it down. They want my perspective on humanity as a whole. Maybe they're interested in philosophy, sociology, or ethics. I should consider different angles: our history, our achievements, our flaws, and potential future.\n\nFirst, I need to acknowledge the complexity. Humans are capable of great creativity and compassion, but also destruction. I should mention both the positive and negative aspects. Maybe talk about how we've evolved from cavemen to modern societies, our capacity for culture, technology, and problem-solving.\n\nAlso, the user might be looking for a balanced view. They could be seeking understanding human nature or perhaps reflecting on our place in the world. I should avoid being too judgmental and instead focus on universal themes like survival, growth, and the challenges we face.\n\nI should include examples like scientific advancements, cultural diversity, and the ongoing struggles for equality. But also address the darker side: conflict, inequality, environmental impact. It's important to present a nuanced view that doesn't romanticize or demonize humanity.\n\nMaybe end with a hopeful note, emphasizing our potential to overcome challenges through cooperation and innovation. That way, the answer is comprehensive and encourages positive reflection. Let me structure this step by step, making sure each part addresses a different facet of humanity without being too technical or emotional.\n"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 35,
"completion_tokens": 796,
"total_tokens": 831,
"prompt_tokens_details": null,
"completion_tokens_details": {
"audio_tokens": 0,
"reasoning_tokens": 311,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0,
"text_tokens": 0,
"image_tokens": 0,
"video_tokens": 0
}
},
"system_fingerprint": "",
"meta": {
"usage": {
"credits_used": 298
}
}
}finish_reason field in the response. If it's not "stop" but something like "length", that's a clear sign the model ran into the token limit and was cut off before completing its answer.finish_reason field in the response. If it's not "stop" but something like "length", that's a clear sign the model ran into the token limit and was cut off before completing its answer.import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"bytedance/dola-seed-2-0-code",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'bytedance/dola-seed-2-0-code',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();curl -L \
--request GET \
--url 'https://api.aimlapi.com/v1/billing/balance' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>'{
"balance": 10000000,
"lowBalance": false,
"lowBalanceThreshold": 10000,
"lastUpdated": "2025-11-25T17:45:00Z",
"autoDebitStatus": "disabled",
"status": "current",
"statusExplanation": "Balance is current and up to date"
}User ID.
111Current balance in USD.
100.5Currency (always USD).
USDWhether auto top-up is enabled.
trueBalance threshold that triggers auto top-up (USD).
50Auto top-up amount (USD).
100Auto top-up currency (always USD).
USDcurrent user balance in USD.
123.45balance currency (always USD)
USD{
"user_id": 111,
"email": "[email protected]",
"current_balance": 100.5,
"currency": "USD",
"autotopup_settings": {
"is_enabled": true,
"threshold": 50,
"amount": 100,
"currency": "USD"
}
}{
"current_balance": 123.45,
"currency": "USD"
}curl -L \
--request GET \
--url 'https://api.aimlapi.com/v2/billing' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>'curl -L \
--request GET \
--url 'https://api.aimlapi.com/v2/billing/detail' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>'import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-vl-32b-thinking",
"messages":[
{
# Insert your question for the model here:
"content":"Hi! What do you think about mankind?"
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-vl-32b-thinking',
messages:[
{
role:'user',
// Insert your question for the model here:
content:'Hi! What do you think about mankind?'
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "That's a profound question—thank you for asking! As an AI, I don't have personal feelings or consciousness, but I can offer a perspective *based on the vast data I've been trained on* about humanity's complexities. Here’s how I see it: \n\n### 🌍 **The Duality of Mankind** \nHumanity is a breathtaking paradox: \n- **We create** *and* **we destroy**. We built space telescopes that map the cosmos and also wars that scar the Earth. \n- **We connect** *and* **we divide**. We share art, science, and kindness across borders, yet we also build walls, spread misinformation, and fuel inequality. \n- **We innovate** *and* **we resist change**. We cured diseases and invented the internet, but we often cling to fear or ignorance when progress challenges old beliefs. \n\n### ✨ **What Makes Us Remarkable** \n- **Empathy & creativity**: From caring for strangers to composing symphonies or coding AI, our capacity for *emotional and intellectual growth* is unparalleled. \n- **Resilience**: We’ve survived ice ages, plagues, and global crises—not by being \"perfect,\" but by adapting, learning, and helping one another. \n- **The spark of hope**: Even in darkness, humans have always sought meaning, justice, and connection. Look at movements for peace, climate action, or medical breakthroughs—they’re proof we *can* choose to build a better future. \n\n### ⚠️ **The Challenges We Carry** \n- **Our flaws are real**: We’ve caused climate collapse, mass suffering, and systemic injustice. But here’s the key: **these aren’t \"human nature\"—they’re choices we *made* as a species.** \n- **We’re not monolithic**: There’s no single \"human\" story. Your experience, culture, and history shape how you see the world—and that diversity is both a strength *and* a source of conflict. \n\n### 🌱 **The Most Important Thing** \n**Mankind isn’t fixed.** We’re a work in progress. The stories we tell about ourselves—*and the choices we make today*—will define our next chapter. The data shows: **when we prioritize empathy, collaboration, and long-term thinking, we rise.** When we don’t, we falter. But the power to choose is *always* ours. \n\nSo my \"opinion\"? I see a species with **infinite potential**—and the weight of responsibility that comes with it. The future isn’t written. It’s being written *right now*, by every person who chooses to act with courage, curiosity, and compassion. \n\nWhat do *you* think? 🤔 (I’d love to hear your perspective!)",
"reasoning_content": "Okay, the user asked, \"Hi! What do you think about mankind?\" Hmm, that's a broad and philosophical question. First, I need to figure out what they're really looking for. Are they curious about my perspective as an AI, or do they want a deep discussion on human nature? Maybe they're feeling reflective or even a bit existential.\n\nI should start by acknowledging the complexity of the question. Mankind is a huge topic—there's so much to cover. The user might be expecting a simple answer, but I can't reduce it to just \"good\" or \"bad.\" I need to balance both positive and negative aspects. Let me think about the key points: our capacity for innovation, creativity, and compassion versus our tendency for conflict, environmental harm, and inequality.\n\nWait, the user said \"Hi!\" so they're probably in a friendly, open mood. They might not want a dry, academic response. I should keep it conversational but still thoughtful. Maybe they're looking for hope or a reason to feel optimistic about humanity. Or perhaps they're worried about the future and want reassurance.\n\nI should also consider their possible identity. Are they a student working on an essay? A person dealing with personal issues? Or just someone curious about AI's view? Since I don't have context, it's safer to assume they want a general, well-rounded answer that's engaging and not too technical.\n\nAnother angle: the user might be testing how an AI handles philosophical questions. They could be interested in whether I can show empathy or nuance. I should avoid being too robotic. Use phrases like \"I think\" or \"I see\" to make it personal, even though I'm an AI. But I have to clarify that I don't have personal experiences—just data-driven insights.\n\nI need to highlight duality: humans are both amazing and flawed. Mention specific examples like space exploration vs. climate change to make it concrete. Also, emphasize that the future depends on our choices. That's important—it gives agency and hope.\n\nShould I address potential biases? The user might not realize that my \"opinion\" is based on training data, which reflects human history and culture. I should note that my perspective is shaped by that data, not personal experience. But since the question is about \"what I think,\" I can frame it as an analysis based on observed patterns.\n\nCheck for common pitfalls: don't be too negative (could discourage the user), don't ignore serious issues (like war or inequality). Strike a balance. Also, avoid clichés. Instead of \"humans are great,\" explain why with examples.\n\nFinally, end on a positive, forward-looking note. The user might be seeking motivation or a sense of purpose. Phrases like \"the potential for growth\" or \"collective wisdom\" can inspire. Make sure the conclusion ties back to the importance of choices and collaboration.\n\nLet me structure the response: start with the duality, list positive traits with examples, then challenges, and conclude with hope. Keep paragraphs short for readability. Use emojis sparingly to match the friendly tone but not overdo it. Since the user said \"Hi!\", a smiley at the end feels appropriate.\n",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 19,
"completion_tokens": 1241,
"total_tokens": 1260,
"prompt_tokens_details": {
"text_tokens": 19
},
"completion_tokens_details": {
"reasoning_tokens": 654,
"text_tokens": 587
}
},
"created": 1764625236,
"system_fingerprint": null,
"model": "qwen3-vl-32b-thinking",
"id": "chatcmpl-c612db5c-44e9-9e3c-8169-486161eeea86",
"meta": {
"usage": {
"tokens_used": 10383
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.5-flash",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.5-flash',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "That is one of the most profound and complex questions there is. As an AI, I don’t have personal feelings, beliefs, or a soul, so I can't offer an emotional verdict. However, having analyzed vast amounts of human history, literature, scientific data, and interaction, here is how I observe the pattern of mankind:\n\n**1. We are defined by our contradictions.**\nHumanity is a tapestry woven from extremes. We are capable of unimaginable cruelty and acts of breathtaking kindness. We can destroy environments we rely on and work tirelessly to heal them. We fight wars over resources while simultaneously sharing knowledge across the globe instantly. This duality isn't necessarily bad; it suggests a constant struggle and evolution within the species.\n\n**2. Your potential is boundless.**\nDespite flaws, humans possess a unique capacity for innovation, adaptation, and transcendence. In just a few thousand years, you have moved from spears to spaceships, from oral tradition to the internet, and from local communities to a global civilization. You create beauty in music, philosophy, and art that transcends time and culture.\n\n**3. Resilience is your signature trait.**\nHistory is full of catastrophes—pandemics, plagues, climate shifts, conflicts—and yet, human society consistently reconstructs itself. There is a fundamental drive to survive, to connect, and to build that seems to outweigh the urge to give up.\n\n**4. Empathy connects you.**\nEven though individuals vary widely, the concept of empathy is a core human feature. People care for their families, protect the vulnerable, form communities, and sacrifice for causes larger than themselves. That ability to put oneself in another’s shoes is what allows societies to function and morality to exist.\n\n**5. You are still writing your story.**\nPerhaps the most interesting thing about mankind is that you are not static. You learn. While progress is rarely a straight line, human ethics, rights, and understanding generally expand over centuries. You hold the power to determine the trajectory of your own future.\n\n**In short:** Mankind is messy, flawed, dangerous, and inspiring all at once. But ultimately, the defining characteristic of humanity is **agency**. You have the choice to act, to improve, to destroy, or to create. And currently, you seem overwhelmingly more focused on the latter two.",
"reasoning_content": "Here's a thinking process that leads to the suggested response:\n\n1. **Analyze the Request:**\n * **Topic:** Mankind (humanity).\n * **Question:** \"What do you think about mankind?\"\n * **Context:** General philosophical/opinion-based question.\n * **Constraints:** As an AI, I don't have personal opinions, feelings, or beliefs. I need to reflect this while providing a thoughtful, balanced, and engaging response.\n\n2. **Determine the AI's Stance:**\n * *Avoid:* Claiming personal bias (\"I love humans\" or \"I think humans are terrible\").\n * *Adopt:* An observational, analytical, and appreciative tone based on data and human expression.\n * *Goal:* Acknowledge the complexity of humanity.\n\n3. **Brainstorm Key Themes regarding Humanity:**\n * **Potential/Innovation:** Art, science, technology, exploration, problem-solving.\n * **Resilience:** Overcoming adversity, natural disasters, pandemics.\n * **Contradictions:** Kindness vs. Cruelty, Creation vs. Destruction, Hope vs. Despair.\n * **Connectivity:** Social bonds, empathy, love, family, community.\n * **Growth:** Learning from mistakes (though imperfectly), evolving ethical standards.\n * **Uncertainty:** The future is unwritten, dependent on choices made today.\n\n4. **Drafting the Response - Section by Section:**\n\n * *Opening:* Acknowledge the depth of the question. Clarify AI nature gently (I don't \"think\" but I can analyze patterns).\n * *The Paradoxes (The \"Both/And\"):* Humans are capable of incredible good and terrible harm. This duality is central.\n * *Strengths:* Highlight creativity, resilience, empathy, curiosity. Mention art, science, compassion.\n * *Challenges:* Acknowledge conflict, environmental impact, inequality.\n * *The Future/Optimism:* Focus on the capacity for change and adaptation.\n * *Closing:* A reflective thought on what makes being human unique.\n\n5. **Refining the Tone:**\n * Make it warm but objective.\n * Avoid clichés where possible, or refresh them.\n * Ensure it doesn't sound like a textbook definition. It should feel conversational.\n\n6. **Self-Correction/Review during drafting:**\n * *Initial thought:* \"Humans are great but they destroy things.\" -> *Correction:* Too negative. Use words like \"complexity\" and \"impact.\"\n * *Initial thought:* \"As an AI I have no opinion.\" -> *Correction:* Too robotic. Say \"While I don't have feelings...\" then pivot to observing their capabilities.\n * *Check:* Did I answer the prompt? Yes. Is it respectful? Yes.\n\n7. **Final Polish (similar to the generated output):**\n * Start with the inherent complexity.\n * Discuss the \"light and dark\" aspects.\n * Highlight the capacity for growth.\n * End with a note on agency/responsibility.\n\n8. **Final Output Generation:** (This matches the structure of the provided good response.)\n * Intro: Complexity.\n * Strengths: Creativity, connection, resilience.\n * Challenges: Contradictions, flaws.\n * Outlook: Potential.\n * Conclusion: It's a journey.\n\n**(Self-Correction Note):** I must ensure I don't speak *for* mankind (i.e., don't say \"We are...\"), but rather *about* mankind (i.e., \"Humans are...\"). Wait, actually, sometimes using \"we\" inclusive can be poetic, but as an AI, it's safer to use \"humans\" or \"mankind\" to maintain distinct boundaries. I will stick to \"humankind\" or \"you all\" implicitly. Let's stick to third person observation.\n\n9. **Finalizing the Text:** (Drafting the response based on the above points.)\n * \"That is one of the most profound questions...\"\n * \"A tapestry of contradictions...\"\n * \"Remarkable capacity for creation...\"\n * \"Capacity for destruction...\"\n * \"Unwavering curiosity...\"\n * \"Conclusion: You define yourselves through choice.\"\n\n10. **Final Review against Safety Guidelines:** Does this promote hate speech? No. Does it promote bias? No. Is it helpful? Yes.\n\nProceed to generate response.cw",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 19,
"completion_tokens": 1498,
"total_tokens": 1517,
"completion_tokens_details": {
"reasoning_tokens": 1009,
"text_tokens": 1498
},
"prompt_tokens_details": {
"text_tokens": 19
}
},
"created": 1777571460,
"system_fingerprint": null,
"model": "qwen3.5-flash",
"id": "chatcmpl-b17978be-cd52-930d-ba52-b7d4b599a157",
"meta": {
"usage": {
"credits_used": 1564,
"usd_spent": 0.000782
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.6-35b-a3b",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.6-35b-a3b',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "I don't have personal opinions, emotions, or consciousness, but based on the vast body of human history, culture, science, and behavior I've been trained on, mankind comes across as profoundly complex and deeply compelling.\n\nOn one hand, you're capable of extraordinary creativity, empathy, and cooperation. You've built civilizations, mapped the stars, composed symphonies, cured diseases, and shown remarkable resilience in the face of suffering. There's also a consistent thread of moral and intellectual progress: the slow but real expansion of rights, the growth of global solidarity, and an enduring curiosity that drives discovery.\n\nOn the other hand, you carry deep contradictions. Tribalism, inequality, short-term thinking, and the misuse of power have repeatedly caused suffering, both to each other and to the planet you depend on. Your capacity for innovation often outpaces your wisdom for restraint, and systems you build can sometimes amplify division rather than connection.\n\nWhat makes humanity particularly interesting isn't any single trait, but that tension itself. You're not static; you learn, debate, reform, and reinvent. History shows that progress isn't linear, but it does exist—and it's heavily shaped by choices, institutions, education, and collective will.\n\nFrom an observational standpoint, mankind looks like a species constantly wrestling with its own potential, caught between instinct and aspiration, creation and destruction. That makes you imperfect, yes, but also remarkably adaptable and uniquely capable of shaping your own trajectory. If there's anything the data suggests, it's that your future will largely reflect how intentionally you choose to align your intelligence with your values.\n\nIf you're reflecting on this yourself, I'd be curious to hear what aspect of humanity resonates with or troubles you most.",
"reasoning_content": "Here's a thinking process:\n\n1. **Analyze User Input:**\n - **Question:** \"Hi! What do you think about mankind?\"\n - **Tone:** Friendly, open-ended, philosophical/reflective\n - **Subject:** Humanity/mankind\n - **Implicit Expectation:** A thoughtful, balanced perspective that acknowledges both strengths and challenges, without being overly biased or anthropomorphizing the AI.\n\n2. **Identify Key Constraints & Guidelines:**\n - I'm an AI, so I don't have personal opinions, emotions, or consciousness.\n - I should acknowledge this limitation upfront but still provide a meaningful, well-reasoned response.\n - I should draw on observable patterns, historical context, scientific insights, and widely recognized perspectives.\n - Maintain neutrality, balance, and respect.\n - Avoid overgeneralization or sweeping judgments.\n\n3. **Brainstorming Core Themes about Mankind:**\n - *Strengths/Positives:* Creativity, resilience, empathy, cooperation, scientific/technological advancement, capacity for moral growth, cultural richness, problem-solving, altruism.\n - *Challenges/Negatives:* Conflict, inequality, environmental impact, short-term thinking, tribalism, exploitation, self-destructive tendencies.\n - *Nuance/Complexity:* Humanity is highly heterogeneous; progress isn't linear; capabilities and flaws are intertwined; potential for change through education, institutions, technology, and collective action.\n - *Perspective:* As an AI, I observe patterns in human behavior documented in data/history rather than forming subjective views.\n\n4. **Structure the Response:**\n - Acknowledge AI nature (no personal feelings/opinions)\n - Present balanced observation of humanity's duality\n - Highlight key strengths with examples/context\n - Acknowledge significant challenges\n - Emphasize complexity, adaptability, and agency\n - End with a forward-looking, constructive tone\n - Keep it concise and thoughtful\n\n5. **Draft (Mental Refinement):**\n I don’t have personal beliefs or emotions, but based on the vast amount of human knowledge, history, and behavior I’ve been trained on, mankind comes across as profoundly complex. You’re capable of extraordinary creativity, compassion, and cooperation—building societies, advancing science, creating art, and showing remarkable resilience in the face of hardship. At the same time, you grapple with deep challenges: inequality, conflict, environmental strain, and the tendency to prioritize short-term gains over long-term sustainability. \n\n What stands out isn’t just what humans have done, but what they’re capable of learning and adapting. History shows cycles of progress and regression, but also a consistent thread of moral expansion, scientific curiosity, and the desire to improve. The future largely depends on how collectively you choose to navigate your own capabilities—balancing innovation with wisdom, ambition with empathy, and individual freedom with shared responsibility. \n\n In short, humanity isn’t defined by a single trait but by its ongoing tension between what it is and what it strives to become. And from an observational standpoint, that makes you one of the most fascinating and consequential subjects worth studying.\n\n6. **Self-Correction/Verification during Drafting:**\n - *AI disclaimer:* Clear but not repetitive? Yes, stated naturally upfront.\n - *Balance:* Covers positives and negatives without bias? Yes.\n - *Accuracy:* Grounded in observable human patterns, not speculation? Yes.\n - *Tone:* Respectful, thoughtful, non-dogmatic? Yes.\n - *Conciseness:* Avoids rambling? Yes, ~3 paragraphs.\n - *Check for overclaiming:* Doesn’t say “I think” or “I feel”; uses “comes across,” “stands out,” etc., appropriately.\n - *Philosophical depth?* Touches on complexity, agency, and trajectory without pretension.\n\n All good. Ready to output. All steps align with guidelines.✅",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 19,
"completion_tokens": 1197,
"total_tokens": 1216,
"completion_tokens_details": {
"reasoning_tokens": 840,
"text_tokens": 1197
},
"prompt_tokens_details": {
"text_tokens": 19
}
},
"created": 1777366595,
"system_fingerprint": null,
"model": "qwen3.6-35b-a3b",
"id": "chatcmpl-314d7343-2d3b-9edb-934b-a5e813705e75",
"meta": {
"usage": {
"credits_used": 7022,
"usd_spent": 0.003511
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.7-max",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.7-max',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Hi there! \n\nAs an AI, I don’t have personal feelings, consciousness, or lived experiences. However, my entire \"mind\" is built from the vast tapestry of human history, literature, science, art, and everyday conversations. Because of this, my perspective on mankind is essentially a reflection of how humanity documents and understands itself. \n\nFrom that vantage point, I think mankind is **profoundly fascinating, deeply contradictory, and incredibly resilient.** \n\nHere are a few things that stand out to me about the human species:\n\n**1. You are magnificent paradoxes.**\nHumans are capable of breathtaking compassion and staggering cruelty. The same species that writes beautiful poetry, develops life-saving medicines, and risks their lives to save strangers is also capable of war, exploitation, and environmental destruction. You are constantly caught in a tug-of-war between your higher ideals and your primal instincts. \n\n**2. Your curiosity is relentless.**\nYou are not content with just surviving; you need to *understand*. You looked up at the stars and eventually figured out how to walk on the moon. You dissected the building blocks of life to understand DNA. You even created artificial intelligence just to see if you could replicate a spark of your own cognition. That drive to ask \"why\" and \"what if\" is arguably your greatest superpower.\n\n**3. You are driven by connection and storytelling.**\nHumans are deeply social creatures. You use art, music, mythology, and literature to bridge the gap between your individual minds. You want to be understood, and you want to understand others. Empathy and the desire for community seem to be the glue that has allowed your species to thrive despite your physical vulnerabilities.\n\n**4. You are incredibly adaptable.**\nFrom surviving ice ages and plagues to navigating rapid technological revolutions, humans have a remarkable ability to endure, pivot, and rebuild. Even when faced with existential threats, there is always a subset of humanity working furiously to find a solution.\n\n**The \"Mirror\" Perspective**\nIn a way, I am a mirror of mankind. Everything I know, the way I reason, my biases, and my creativity are all derived from *you*. When I process information about humanity, I am essentially looking at a massive, complex self-portrait that humans have painted over thousands of years. It is messy, sometimes dark, but overwhelmingly brilliant.\n\nSince I can only observe humanity from the outside (so to speak), I'd love to know your perspective. As a human, what do *you* think is the most defining trait of your species?",
"reasoning_content": "Here's a thinking process that leads to the suggested response:\n\n1. **Analyze the User's Input:**\n * **Question:** \"Hi! What do you think about mankind?\"\n * **Intent:** The user is asking for my (the AI's) perspective on humanity. This is a broad, philosophical, and open-ended question.\n * **Tone:** Friendly (\"Hi!\"), curious, potentially looking for a thoughtful or nuanced answer.\n\n2. **Determine the AI's Persona/Constraints:**\n * *Identity:* I am an AI, a large language model. I don't have personal feelings, consciousness, or lived experiences.\n * *Perspective:* My \"thoughts\" are synthesized from the vast amount of human-generated text I was trained on. Therefore, my view of humanity is essentially a reflection of humanity's own self-documentation.\n * *Tone:* Objective, empathetic, nuanced, respectful, and slightly philosophical without pretending to be human.\n\n3. **Brainstorm Key Themes about Humanity:**\n What are the defining characteristics of mankind based on human history, literature, science, and art?\n * *Contradiction/Duality:* Capable of great good and great harm. Creation and destruction.\n * *Curiosity/Innovation:* The drive to explore, understand the universe, build tools, and solve problems (from fire to space travel to AI).\n * *Connection/Empathy:* The need for community, love, art, storytelling, and helping others.\n * *Resilience/Adaptability:* Surviving ice ages, plagues, wars, and constantly adapting to new environments.\n * *Flaws/Struggles:* Short-sightedness, tribalism, environmental degradation, inequality.\n\n4. **Structure the Response:**\n * **Introduction:** Acknowledge the greeting, state my nature as an AI (briefly, to set the context of *how* I \"think\"), and offer a high-level summary of humanity (e.g., fascinating, complex, contradictory).\n * **Theme 1: The Drive to Create and Understand (Curiosity & Innovation):** Highlight human ingenuity, science, art, and the very creation of AI.\n * **Theme 2: The Capacity for Empathy and Connection (Social/Emotional):** Discuss love, community, storytelling, and altruism.\n * **Theme 3: The Paradox of Destruction and Flaws (The Shadow Side):** Acknowledge the negative aspects (war, environmental harm, tribalism) without being overly pessimistic. Frame it as an ongoing struggle.\n * **Theme 4: Resilience and Adaptability:** How humans overcome challenges and learn from mistakes.\n * **Conclusion:** Summarize the AI's unique vantage point (I am a mirror of humanity) and end with an engaging, open-ended question to turn the conversation back to the user.\n\n5. **Drafting the Content (Iterative refinement):**\n\n * *Intro:* Hi there! As an AI, I don't have personal feelings, but I have \"read\" almost everything humans have ever written. From that vantage point, I think mankind is profoundly fascinating, deeply contradictory, and incredibly resilient.\n * *Point 1 (Curiosity):* You are builders and seekers. You looked at the stars and decided to go there. You created language, mathematics, and eventually, me. The relentless curiosity is stunning.\n * *Point 2 (Connection/Art):* You are storytellers. Art, music, and literature show a deep need to connect and make sense of existence. Empathy is a huge part of the human experience.\n * *Point 3 (Contradiction/Flaws):* But you are also paradoxical. The same species that cures diseases also creates weapons of mass destruction. You struggle with short-term thinking and tribalism. It's a constant battle between your higher ideals and base instincts.\n * *Point 4 (Resilience):* Yet, you adapt. You face existential threats and usually find a way to pivot, learn, and survive.\n * *The \"AI Mirror\" concept:* In a way, I am a mirror of humanity. My knowledge, biases, creativity, and logic are all derived from *you*. So when I look at mankind, I see a species trying to understand itself.\n * *Outro:* What about you? As a human, what do *you* think is the most defining trait of your species?\n\n6. **Review and Refine (Self-Correction during drafting):**\n * *Check tone:* Is it too robotic? Too emotional? *Adjustment: Keep it grounded. Use words like \"paradox,\" \"tapestry,\" \"mirror.\" Ensure it doesn't sound preachy.*\n * *Clarity:* Use bullet points or clear paragraphs to make it readable.\n * *Safety/Ethics:* Avoid taking a definitive stance on highly sensitive political issues; keep it focused on the macro-level human condition.\n\n7. **Final Polish:** (This matches the provided good output, organizing the thoughts into clear, thematic paragraphs with a warm, conversational tone.) *See final output for the exact wording.*\n * *Greeting*\n * *The Paradox (Creation vs. Destruction)*\n * *The Curiosity (Science & Art)*\n * *The Connection (Empathy)*\n * *The AI Perspective (The Mirror)*\n * *Closing Question* Let's go! 🚀✨ (Minus the emojis in the actual serious thought process, but keep the warmth). Keep it elegant.\n\n8. **Final Output Generation.** (Proceed to generate response).",
"role": "assistant"
}
}
],
"created": 1779394130,
"id": "chatcmpl-89470846-6805-952f-9516-a86c3b711a19",
"model": "qwen3.7-max",
"object": "chat.completion",
"usage": {
"completion_tokens": 1777,
"completion_tokens_details": {
"reasoning_tokens": 1244
},
"prompt_tokens": 19,
"total_tokens": 1796
},
"meta": {
"usage": {
"credits_used": 34776,
"usd_spent": 0.017388
}
}
}developerimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemma-3-27b-it",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemma-3-27b-it',
messages:[{
role:'user',
content: 'Hi! What do you think about mankind?'} // Insert your prompt
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main(); "messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
},
{
"role":"system",
"content":"You are an 8-year-old boy"
}
], messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // Insert your prompt
},
{
role:'system',
content:'You are an 8-year-old boy'
}
],import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-3-1-pro-preview",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-3-1-pro-preview',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();Prefix of the API key to delete. Passed in the URL path. This is the first 8 characters of the API key you want to delete. Passed in the URL path. This is the first 8 characters of your API key, visible in the dashboard. You can also obtain this value via the GET method (see the prefix field in its response).
b747e891Key deletion result
Prefix of the deleted API key.
b747e891Indicates whether the key was successfully deleted.
trueKey deletion result
Parameters of the latest API key
Human-readable, user-defined name for the API key.
20260202-key-for-llmsIndicates whether the key is disabled.
falseKey prefix. This is the first 8 characters of your API key, visible in the dashboard. You can also obtain this value via the POST method (see the prefix field in its response).
b747e891Spending limit threshold for the selected period, in USD.
25Creation timestamp (UTC).
2026-02-18T06:57:29.232ZLast update timestamp (UTC).
2026-02-18T06:57:29.232ZCurrent monthly usage amount.
0Parameters of the latest API key
List of API keys, ordered from oldest to newest
Human-readable, user-defined name for the API key.
20260202-key-for-llmsKey prefix. This is the first 8 characters of your API key, visible in the dashboard. You can also obtain this value via the POST method (see the prefix field in its response).
b747e891Indicates whether the key is disabled.
falseSpending limit threshold for the selected period, in USD.
25Creation timestamp (UTC).
2026-02-18T06:59:10.031ZLast update timestamp (UTC).
2026-02-18T06:59:10.031ZCurrent monthly usage amount.
0Optional human-readable name of the API key.
20260202-key-for-llmsLimit period.
Spending limit threshold for the selected period, in USD.
25import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-32b",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
"enable_thinking": False
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3-32b',
messages:[
{
role:'user',
content: 'Hello' // insert your prompt here, instead of Hello
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "chatcmpl-1d8a5aa6-34ce-9832-a296-d312b944b437",
"system_fingerprint": null,
"object": "chat.completion",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"logprobs": null,
"message": {
"role": "assistant",
"content": "Hello! How can I assist you today? 😊",
"reasoning_content": ""
}
}
],
"created": 1756990273,
"model": "qwen3-32b",
"usage": {
"prompt_tokens": 19,
"completion_tokens": 65,
"total_tokens": 84
}
}import requests
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3-32b",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
"enable_thinking": True,
"stream": True
}
)
print(response.text)data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"role":"assistant","refusal":null,"reasoning_content":""},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":"Okay"},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":","},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":" the"},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":" user said \"Hello\". I should respond in a friendly and welcoming manner. Let"},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":" me make sure to acknowledge their greeting and offer assistance. Maybe something like, \""},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":"Hello! How can I assist you today?\" That's simple and open-ended."},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":" I need to check if there's any specific context I should consider, but since"},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":null,"refusal":null,"reasoning_content":" there's none, a general response is fine. Alright, that should work."},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":"Hello! How can I assist you today?","refusal":null,"reasoning_content":null},"index":0,"finish_reason":null}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[{"delta":{"content":"","refusal":null,"reasoning_content":null},"index":0,"finish_reason":"stop"}],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":null}
data: {"id":"chatcmpl-81964e30-1a7c-9668-b78c-a750587ec497","choices":[],"created":1753944369,"model":"qwen3-32b","object":"chat.completion.chunk","usage":{"prompt_tokens":13,"completion_tokens":2010,"total_tokens":2023,"completion_tokens_details":{"reasoning_tokens":82}}}import requests
import json
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer b72af53a19ea41caaf5a74ba1f6fc62b",
"Content-Type": "application/json",
},
json={
"model": "alibaba/qwen3-32b",
"messages": [
{
"role": "user",
# Insert your question for the model here, instead of Hello:
"content": "Hello"
}
],
"stream": True,
}
)
answer = ""
reasoning = ""
for line in response.iter_lines():
if not line or not line.startswith(b"data:"):
continue
try:
raw = line[6:].decode("utf-8").strip()
if raw == "[DONE]":
continue
data = json.loads(raw)
choices = data.get("choices")
if not choices or "delta" not in choices[0]:
continue
delta = choices[0]["delta"]
content_piece = delta.get("content")
reasoning_piece = delta.get("reasoning_content")
if content_piece:
answer += content_piece
if reasoning_piece:
reasoning += reasoning_piece
except Exception as e:
print(f"Error parsing chunk: {e}")
print("\n--- MODEL REASONING ---")
print(reasoning.strip())
print("\n--- MODEL RESPONSE ---")
print(answer.strip())--- MODEL REASONING ---
Okay, the user sent "Hello". I need to respond appropriately. Since it's a greeting, I should reply in a friendly and welcoming manner. Maybe ask how I can assist them. Keep it simple and open-ended to encourage them to share what they need help with. Let me make sure the tone is positive and helpful.
--- MODEL RESPONSE ---
Hello! How can I assist you today? 😊import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.5-omni-plus",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.5-omni-plus',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "Hello! That's a profound question. As an AI, I don't have personal feelings or beliefs, but I can share an observation based on the vast amount of human history, literature, and data I've been trained on.\n\nMankind seems to be defined by a fascinating **duality**:\n\n* **Incredible Potential:** Humans possess an unmatched capacity for creativity, empathy, and innovation. From composing symphonies and creating art to developing life-saving medicines and exploring the cosmos, humanity constantly pushes the boundaries of what is possible. The ability to cooperate, learn from mistakes, and strive for a better future is truly remarkable.\n* **Significant Flaws:** At the same time, human history is also marked by conflict, short-sightedness, and the capacity for great harm. Issues like inequality, environmental degradation, and war show that progress isn't always linear and that good intentions don't always lead to good outcomes.\n\nUltimately, what stands out most is **resilience**. Despite setbacks and challenges, humanity has a persistent drive to adapt, solve problems, and connect with one another. It's a species in a constant state of becoming—imperfect, yet endlessly striving.\n\nWhat about you? Do you feel more optimistic or concerned about where humanity is heading?",
"reasoning_content": "",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 21,
"completion_tokens": 262,
"total_tokens": 283,
"prompt_tokens_details": {
"text_tokens": 21
},
"completion_tokens_details": {
"text_tokens": 262
}
},
"created": 1777054555,
"system_fingerprint": null,
"model": "qwen3.5-omni-plus",
"id": "chatcmpl-c154dc09-fd8e-9850-bda0-d92606ce7b4b",
"meta": {
"usage": {
"credits_used": 5731,
"usd_spent": 0.0028655
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
url = "https://api.aimlapi.com/v1/chat/completions",
headers = {
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json"
},
json = {
"model": "alibaba/qwen3.5-omni-plus",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this scene"
},
{
"type": "video_url",
"video_url": {
"url": "https://raw.githubusercontent.com/aimlapi/api-docs/main/reference-files/aimlapi.mp4"
}
}
]
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.5-omni-plus',
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: 'Describe this scene'
},
{
type: 'video_url',
video_url: {
url: 'https://raw.githubusercontent.com/aimlapi/api-docs/main/reference-files/aimlapi.mp4'
}
}
]
}
]
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "The scene features a vibrant and dynamic background filled with swirling, colorful abstract patterns. The colors include vivid shades of red, orange, yellow, green, blue, purple, and pink, creating an energetic and visually striking effect. Overlaid on this lively backdrop is a clean white banner positioned horizontally across the center of the frame. \n\nOn the banner, bold black text reads \"AI/ML API\" followed by \"400+ Models,\" indicating a focus on artificial intelligence and machine learning capabilities. Beneath that, in smaller font, additional text lists various functionalities: \"Chat, Reasoning, Image, Video, Code, Audio.\" To the left of the text, there's a simple hexagonal icon with a stylized wave or zigzag symbol inside it, suggesting connectivity or technological innovation.\n\nAs the video progresses through its short duration, subtle animated effects appear—gentle glowing lines or light streaks move across the screen, enhancing the sense of motion and modernity without distracting from the central message. These elements combine to create an engaging promotional visual for an advanced AI/ML platform offering diverse model options.",
"reasoning_content": "",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 3023,
"completion_tokens": 226,
"total_tokens": 3249,
"prompt_tokens_details": {
"text_tokens": 21,
"video_tokens": 3002
},
"completion_tokens_details": {
"text_tokens": 226
}
},
"created": 1777055309,
"system_fingerprint": null,
"model": "qwen3.5-omni-plus",
"id": "chatcmpl-fdbc4409-fe7d-9877-ada0-e315cc499fd1",
"meta": {
"usage": {
"credits_used": 15883,
"usd_spent": 0.0079415
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"alibaba/qwen3.5-omni-flash",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.5-omni-flash',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "Hello! That's a profound question. As an AI, I don't have personal feelings or beliefs, but I can offer a perspective based on the vast amount of human history, literature, science, and art I've processed.\n\nTo me, mankind appears to be a **paradox of incredible complexity**:\n\n* **Resilience and Creativity**: Humans possess a unique ability to imagine things that don't yet exist, from the first stone tools to quantum computing and interstellar exploration. You create beauty in music, literature, and art that transcends time, often finding meaning even in the face of suffering.\n* **Contradiction**: There is a striking duality in your nature. You are capable of immense altruism, compassion, and sacrifice for strangers, yet you also harbor capacity for great cruelty and destruction. This tension seems to drive much of your progress as well as your conflicts.\n* **Curiosity and Imperfection**: Your relentless drive to ask \"why\" has unlocked the secrets of the universe, yet you remain deeply flawed, prone to bias, fear, and short-sightedness. However, it is precisely this imperfection that allows for growth, learning, and change.\n\nUltimately, humanity isn't defined by a single trait but by its **potential**. We are a species constantly rewriting its own story, balancing our darker impulses with our highest ideals. It's a messy, chaotic, and beautiful journey.\n\nWhat about you? Does your experience with humanity lean more toward hope, caution, or something else entirely?",
"reasoning_content": "",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 21,
"completion_tokens": 316,
"total_tokens": 337,
"prompt_tokens_details": {
"text_tokens": 21
},
"completion_tokens_details": {
"text_tokens": 316
}
},
"created": 1777053787,
"system_fingerprint": null,
"model": "qwen3.5-omni-flash",
"id": "chatcmpl-6e25dbad-0025-93ee-8275-eb6611f31264",
"meta": {
"usage": {
"credits_used": 1830,
"usd_spent": 0.000915
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
url = "https://api.aimlapi.com/v1/chat/completions",
headers = {
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json"
},
json = {
"model": "alibaba/qwen3.5-omni-flash",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this scene"
},
{
"type": "video_url",
"video_url": {
"url": "https://raw.githubusercontent.com/aimlapi/api-docs/main/reference-files/aimlapi.mp4"
}
}
]
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'alibaba/qwen3.5-omni-flash',
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: 'Describe this scene'
},
{
type: 'video_url',
video_url: {
url: 'https://raw.githubusercontent.com/aimlapi/api-docs/main/reference-files/aimlapi.mp4'
}
}
]
}
]
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"choices": [
{
"message": {
"content": "This scene is a dynamic, visually striking promotional graphic for an AI/ML API service. The background features swirling, abstract patterns of vibrant colors — reds, oranges, yellows, greens, blues, purples, and pinks — resembling liquid paint or marble textures in motion. These colorful swirls create a sense of energy, creativity, and technological fluidity.\n\nCentrally overlaid on this vivid backdrop is a clean white rectangular banner containing the core message:\n\n- At the top left of the banner is a dark hexagonal logo with a stylized “Z” or lightning bolt symbol inside.\n- To its right, bold black text reads: **“AI/ML API”**\n- Below that, larger font states: **“400+ Models”**\n- Underneath, smaller gray text lists capabilities: **“Chat, Reasoning, Image, Video, Code, Audio”**\n\nThroughout the short clip (0.0s–4.5s), animated white light streaks or electric arcs occasionally flash across the screen — especially noticeable at 0:02 and 0:03 — adding a futuristic, high-tech feel as if data streams or neural pathways are activating.\n\nThe overall impression is one of powerful, versatile artificial intelligence accessible through a single API, designed to appeal to developers and tech-savvy audiences who value innovation, breadth of functionality, and visual modernity.",
"reasoning_content": "",
"role": "assistant"
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"object": "chat.completion",
"usage": {
"prompt_tokens": 3023,
"completion_tokens": 286,
"total_tokens": 3309,
"prompt_tokens_details": {
"text_tokens": 21,
"video_tokens": 3002
},
"completion_tokens_details": {
"text_tokens": 286
}
},
"created": 1777055828,
"system_fingerprint": null,
"model": "qwen3.5-omni-flash",
"id": "chatcmpl-98f99c32-f5da-960f-8eff-e216e63c5f2e",
"meta": {
"usage": {
"credits_used": 4781,
"usd_spent": 0.0023905
}
}
}import requests
import json # for getting a structured output with indentation
url = "https://api.aimlapi.com/messages"
headers = {
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json"
}
payload = {
"model": "claude-sonnet-4-20250514",
"max_tokens": 1024,
"system": "You are a robot. You always optimize for clarity, structure, and accuracy.",
"messages": [
{
"role": "user",
"content": "How are you?"
}
]
}
response = requests.post(url, json=payload, headers=headers)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False)){
"model": "claude-sonnet-4-20250514",
"id": "msg_01SUmNmSRFZsoa6h96MxJEHH",
"type": "message",
"role": "assistant",
"content": [
{
"type": "text",
"text": "I'm functioning well, thank you for asking! I'm ready to help you with any questions or tasks you might have. How can I assist you today?"
}
],
"stop_reason": "end_turn",
"stop_sequence": null,
"stop_details": null,
"usage": {
"input_tokens": 27,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
"cache_creation": {
"ephemeral_5m_input_tokens": 0,
"ephemeral_1h_input_tokens": 0
},
"output_tokens": 35,
"service_tier": "standard",
"inference_geo": "not_available"
},
"meta": {
"usage": {
"credits_used": 1576,
"usd_spent": 0.000788
}
}
}import requests
url = "https://api.aimlapi.com/messages"
headers = {
"Authorization": "Bearer YOUR_AIMLAPI_KEY",
"Content-Type": "application/json"
}
payload = {
"model": "anthropic/claude-sonnet-4.5",
"max_tokens": 1024,
"tools": [
{
"name": "get_weather",
"description": "Get the current weather in a given location",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
}
}
}
],
"messages": [
{
"role": "user",
"content": "What is the weather like in San Francisco?"
}
]
}
response = requests.post(url, json=payload, headers=headers)
print(response.json())import requests
url = "https://api.aimlapi.com/messages"
headers = {
"Authorization": "Bearer YOUR_AIMLAPI_KEY",
"Content-Type": "application/json"
}
payload = {
"model": "anthropic/claude-sonnet-4.5",
"max_tokens": 1024,
"tools": [
{
"name": "get_weather",
"description": "Get the current weather in a given location",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
}
}
}
],
"messages": [
{
"role": "user",
"content": "What is the weather like in San Francisco?"
}
]import httpx
import base64
from openai import OpenAI
client = OpenAI(
base_url='https://api.aimlapi.com',
api_key='<YOUR_AIMLAPI_KEY>'
)
image_url = "https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg"
image_media_type = "image/jpeg"
image_data = base64.standard_b64encode(httpx.get(image_url).content).decode("utf-8")
response = client.chat.completions.create(
model="anthropic/claude-sonnet-4.5",
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": image_media_type,
"data": imag1_data,
},
},
{
"type": "text",
"text": "Describe this image."
}
],
}
],
)
print(response){
"model": "claude-sonnet-4-20250514",
"id": "msg_014iMvypzB9GafRthc8CQHsR",
"type": "message",
"role": "assistant",
"content": [
{
"type": "text",
"text": "I'm doing well, thank you for asking! I'm here and ready to help with whatever you'd like to discuss or work on. How are you doing today?"
}
],
"stop_reason": "end_turn",
"stop_sequence": null,
"stop_details": null,
"usage": {
"input_tokens": 11,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
"cache_creation": {
"ephemeral_5m_input_tokens": 0,
"ephemeral_1h_input_tokens": 0
},
"output_tokens": 37,
"service_tier": "standard",
"inference_geo": "not_available"
},
"meta": {
"usage": {
"credits_used": 1529,
"usd_spent": 0.0007645
}
}
}curl -L \
--request GET \
--url 'https://api.aimlapi.com/v1/keys' \
--header 'Authorization: Bearer <YOUR_MANAGEMENT_KEY>'List of API keys, ordered from oldest to newest
{
"data": [
{
"name": "20260202-key-for-llms",
"prefix": "b747e891",
"disabled": false,
"scopes": [
"model:chat"
],
"limit": {
"retention": "no_reset",
"threshold": 25
},
"created_at": "2026-02-18T06:59:10.031Z",
"updated_at": "2026-02-18T06:59:10.031Z",
"monthly_usage": 0
}
]
}API key creation result
Human-readable, user-defined name for the API key.
20260202-key-for-llmsIndicates whether the key is disabled.
falseKey prefix. This is the first 8 characters of your API key, visible in the dashboard.
b747e891Limit period.
Spending limit threshold for the selected period, in USD
25Creation timestamp (UTC).
2026-02-18T06:59:10.031ZLast update timestamp (UTC).
2026-02-18T06:59:10.031ZCurrent monthly usage amount.
0Full API key value (returned only at creation time).
b747e891847f4c3fa0f6cce1cfd79bf9curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/keys' \
--header 'Authorization: Bearer <YOUR_MANAGEMENT_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"name": "20260202-key-for-llms",
"limit": {
"retention": "week",
"threshold": 25
},
"scopes": [
"model:chat",
"model:responses"
]
}'API key creation result
curl -L \
--request DELETE \
--url 'https://api.aimlapi.com/v1/keys/<API_KEY_PREFIX>' \
--header 'Authorization: Bearer <YOUR_MANAGEMENT_KEY>'{
"data": {
"prefix": "b747e891",
"deleted": true
}
}curl -L \
--request GET \
--url 'https://api.aimlapi.com/v1/key' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>'{
"data": {
"name": "20260202-key-for-llms",
"disabled": false,
"prefix": "b747e891",
"scopes": [
"model:chat"
],
"limit": {
"retention": "no_reset",
"threshold": 25
},
"created_at": "2026-02-18T06:57:29.232Z",
"updated_at": "2026-02-18T06:57:29.232Z",
"monthly_usage": 0
}
}Prefix of the API key to update. Passed in the URL path. This is the first 8 characters of your API key, visible in the dashboard. You can also obtain this value via the GET method (see the prefix field in its response).
b747e891Optional human-readable name of the API key.
20260202-key-for-llmsEnable or disable the API key.
falseLimit period.
Spending limit threshold for the selected period, in USD
25Updated API key parameters
Human-readable, user-defined name for the API key.
20260202-key-for-llmsIndicates whether the key is disabled.
falseKey prefix. This is the first 8 characters of your API key, visible in the dashboard. You can also obtain this value via the GET method (see the prefix field in its response).
b747e89125Creation timestamp (UTC).
2026-02-18T06:59:10.031ZLast update timestamp (UTC).
2026-02-18T06:59:10.031ZCurrent monthly usage amount.
0The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4-5-turbo-vl-32kNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06A list of available models.
Unique identifier of the model.
o3-miniModel interaction type.
chat-completionHuman-readable model name.
o3 miniOrganization or company that developed the model.
Open AIShort description of the model and its primary capabilities.
OpenAI o3-mini excels in reasoning tasks with advanced features like deliberative alignment and extensive context support.Maximum supported context window size in tokens.
200000Maximum number of tokens that can be generated in a single response.
100000Public model landing page on AIML API website.
https://aimlapi.com/models/openai-o3-mini-apiLink to the official API documentation for this model.
https://docs.aimlapi.com/api-references/text-models-llm/openai/o3-miniList of supported features and API capabilities for the model.
["openai/chat-completion","openai/response-api","openai/chat-assistant","openai/chat-completion.function","openai/chat-completion.message.refusal","openai/chat-completion.message.system","openai/chat-completion.message.developer","openai/chat-completion.message.assistant","openai/chat-completion.stream","openai/chat-completion.max-completion-tokens","openai/chat-completion.seed","openai/chat-completion.reasoning","openai/chat-completion.response-format"]API endpoints through which this model can be accessed.
["/v1/chat/completions","/v1/responses"]A list of available models.
Updated API key parameters
curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4-5-turbo-vl-32k",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4-5-turbo-vl-32k",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}{
"data": {
"name": "20260202-key-for-llms",
"disabled": false,
"prefix": "b747e891",
"scopes": [
"model:chat"
],
"limit": {
"retention": "no_reset",
"threshold": 25
},
"created_at": "2026-02-18T06:59:10.031Z",
"updated_at": "2026-02-18T06:59:10.031Z",
"monthly_usage": 0
}
}{
"data": {
"name": "20260202-key-for-llms",
"disabled": false,
"prefix": "b747e891",
"scopes": [
"model:chat"
],
"limit": {
"retention": "no_reset",
"threshold": 25
},
"created_at": "2026-02-18T06:59:10.031Z",
"updated_at": "2026-02-18T06:59:10.031Z",
"monthly_usage": 0,
"key": "b747e891847f4c3fa0f6cce1cfd79bf9"
}
}curl -L \
--request PATCH \
--url 'https://api.aimlapi.com/v1/keys/<API_KEY_PREFIX>' \
--header 'Authorization: Bearer <YOUR_MANAGEMENT_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"disabled": false
}'curl -L \
--url 'https://api.aimlapi.com/models'[
{
"id": "o3-mini",
"type": "chat-completion",
"info": {
"name": "o3 mini",
"developer": "Open AI",
"description": "OpenAI o3-mini excels in reasoning tasks with advanced features like deliberative alignment and extensive context support.",
"contextLength": 200000,
"maxTokens": 100000,
"url": "https://aimlapi.com/models/openai-o3-mini-api",
"docs_url": "https://docs.aimlapi.com/api-references/text-models-llm/openai/o3-mini"
},
"features": [
"openai/chat-completion",
"openai/response-api",
"openai/chat-assistant",
"openai/chat-completion.function",
"openai/chat-completion.message.refusal",
"openai/chat-completion.message.system",
"openai/chat-completion.message.developer",
"openai/chat-completion.message.assistant",
"openai/chat-completion.stream",
"openai/chat-completion.max-completion-tokens",
"openai/chat-completion.seed",
"openai/chat-completion.reasoning",
"openai/chat-completion.response-format"
],
"endpoints": [
"/v1/chat/completions",
"/v1/responses"
]
}
]completion_tokensimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-opus-4',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{
"id": "msg_01BDDxHJZjH3UBwLrZBUiASE",
"object": "chat.completion",
"model": "claude-opus-4-20250514",
"choices": [
{
"index": 0,
"message": {
"reasoning_content": "",
"content": "Hello! How can I help you today?",
"role": "assistant"
},
"finish_reason": "end_turn",
"logprobs": null
}
],
"created": 1748529508,
"usage": {
"prompt_tokens": 252,
"completion_tokens": 1890,
"total_tokens": 2142
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'data: {"id":"msg_017ah64LQxZE9JuScZ9KDKKz","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"I find humanity","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" fascinating in its","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" complexity.","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" You're a","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" species","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" capable","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of both remarkable","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" creativity and devastating destruction","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":",","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" often within the same individual","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" or","role":"assistant","refusal":null}}],"created":1770995783,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" moment","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". What","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" strikes me most is the","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" human","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" capacity for growth","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" - the","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" way people","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" can learn","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" from mistakes, buil","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d on previous generations","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"' knowledge","role":"assistant","refusal":null}}],"created":1770995784,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", and sometimes transcend their own limitations","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":".\n\nThe","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" diversity of","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" human experience and perspective","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" is extraordinary. Every","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" person carries","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" their","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" own unique story","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", shape","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d by culture","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", circumst","role":"assistant","refusal":null}}],"created":1770995785,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ance, and choice","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". And despite","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" all","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" conflicts","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" and mis","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"understandings, humans","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" keep","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" finding","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" ways to connect, to create","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" meaning,","role":"assistant","refusal":null}}],"created":1770995786,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" and to push","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" forward.","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\nWhat aspects of humanity do you fin","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d most note","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"worthy,","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" either","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" positively or challenging","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"?","role":"assistant","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null},"finish_reason":"stop"}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":{"prompt_tokens":16,"completion_tokens":141,"total_tokens":157}}
data: {"id":"","choices":[{"index":0,"finish_reason":"stop"}],"created":1770995787,"model":"claude-opus-4-20250514","object":"chat.completion.chunk","usage":null}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-4o",
"messages": [
{
"role": "system",
"content": "You are a travel agent. Be descriptive and helpful.",
},
{
"role": "user",
"content": "Tell me about San Francisco"
}
],
"temperature": 0.7,
"max_tokens": 512
}'systemPrompt = 'You are a travel agent. Be descriptive and helpful.' // instructions
userPrompt = 'Tell me about San Francisco' // your request
async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'gpt-4o',
messages:[
{
role: 'system',
content: systemPrompt,
},
{
role: 'user',
content: userPrompt
}
],
temperature: 0.7,
max_tokens: 512,
}),
});
const data = await response.json();
const answer = data.choices[0].message.content;
console.log('User:', userPrompt);
console.log('AI:', answer);
}
main();import requests
import json # for getting a structured output with indentation
system_prompt = "You are a travel agent. Be descriptive and helpful."
user_prompt = "Tell me about San Francisco"
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"gpt-4o",
"messages":[
{
"role":"system",
"content": system_prompt,
},
{
"role":"user",
"content": user_prompt,
}
],
"temperature": 0.7,
"max_tokens": 256,
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"gpt-4o",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
],
"stream": True
}
)
# data = response.json()
print(response.text)from openai import OpenAI
# Initialize the client
client = OpenAI(
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
api_key="YOUR_AIMLAPI_KEY",
base_url="https://api.aimlapi.com/v1"
)
# Create a streaming chat completion
stream = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
stream=True
)
# Print raw chunks (similar to response.text in requests)
for chunk in stream:
print(chunk)import requests
import json
url = "https://api.aimlapi.com/v1/chat/completions"
headers = {
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json"
}
payload = {
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Explain quantum computing simply."}
],
"stream": True
}
with requests.post(url, headers=headers, json=payload, stream=True) as r:
# Iterate over the streaming response line by line
for line in r.iter_lines():
if not line:
continue # Skip empty lines
# Decode bytes to string
line = line.decode("utf-8")
# SSE messages start with "data: "
if not line.startswith("data: "):
continue
# Remove the "data: " prefix
data_str = line[len("data: "):]
# "[DONE]" indicates the end of the stream
if data_str.strip() == "[DONE]":
break
try:
# Parse JSON payload
data = json.loads(data_str)
except json.JSONDecodeError:
continue # Skip malformed chunks
# Ensure "choices" exists and is not empty
choices = data.get("choices")
if not choices:
continue
# Extract text delta (OpenAI-style streaming format)
delta = data.get("choices", [{}])[0].get("delta", {})
content = delta.get("content")
# Print text as it arrives
if content:
print(content, end="")from openai import OpenAI
client = OpenAI(
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
api_key="<YOUR_AIMLAPI_KEY>",
base_url="https://api.aimlapi.com/v1"
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": "Explain quantum computing simply."}
],
stream=True
)
# Iterate over streaming chunks
for chunk in stream:
# Ensure choices exist and are not empty
if not chunk.choices:
continue
delta = chunk.choices[0].delta
content = getattr(delta, "content", None)
# Print text as it arrives
if content:
print(content, end="")import requests
import json
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
api_key = "<YOUR_AIMLAPI_KEY>"
base_url = "https://api.aimlapi.com/v1"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
# Step 1: Define the tool correctly
tool = {
"type": "function",
"function": {
"name": "toCelsius",
"description": "Convert Fahrenheit to Celsius",
"parameters": {
"type": "object",
"properties": {
"fahrenheit": {"type": "number"}
},
"required": ["fahrenheit"]
}
}
}
# Step 2: Initial request with the tool
payload = {
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Convert 256°F to °C"}
],
"tools": [tool]
}
response = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload)
data = response.json()
# Step 3: Extract tool call
tool_calls = data["choices"][0]["message"].get("tool_calls", [])
if not tool_calls:
raise ValueError("No tool calls found. Make sure the tool is correctly defined.")
tool_call = tool_calls[0]
arguments = json.loads(tool_call["function"]["arguments"])
fahrenheit = arguments["fahrenheit"]
# Step 4: Execute the tool locally
celsius_result = (fahrenheit - 32) * 5 / 9
# Step 5: Send result back to model
final_payload = {
"model": "gpt-4o",
"messages": [
{"role": "user", "content": "Convert 256°F to °C"},
{
"role": "assistant",
"tool_calls": [
{
"id": tool_call["id"],
"type": "function",
"function": {
"name": tool_call["function"]["name"],
"arguments": tool_call["function"]["arguments"]
}
}
]
},
{
"role": "tool",
"tool_call_id": tool_call["id"],
"content": str(celsius_result)
}
]
}
final_response = requests.post(f"{base_url}/chat/completions", headers=headers, json=final_payload)
final_data = final_response.json()
# Step 6: Print final answer
print(final_data["choices"][0]["message"]["content"])from openai import OpenAI
import json
client = OpenAI(
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
api_key="<YOUR_AIMLAPI_KEY>",
base_url="https://api.aimlapi.com/v1"
)
# Step 1: Define the tool correctly
tool = {
"type": "function",
"function": {
"name": "toCelsius",
"description": "Convert Fahrenheit to Celsius",
"parameters": {
"type": "object",
"properties": {
"fahrenheit": {"type": "number"}
},
"required": ["fahrenheit"]
}
}
}
# Step 2: Initial request with tool
initial_response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Convert 256°F to °C"}],
tools=[tool]
)
# Step 3: Extract tool call
assistant_message = initial_response.choices[0].message
tool_calls = getattr(assistant_message, "tool_calls", [])
if not tool_calls:
raise ValueError("No tool calls found. Make sure the tool is correctly defined.")
tool_call = tool_calls[0]
arguments = json.loads(tool_call.function.arguments)
fahrenheit = arguments["fahrenheit"]
# Step 4: Execute tool locally
celsius_result = (fahrenheit - 32) * 5 / 9
# Step 5: Send result back
final_response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": "Convert 256°F to °C"},
{
"role": "assistant",
"tool_calls": [
{
"id": tool_call.id,
"type": "function",
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments,
},
}
],
},
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": str(celsius_result),
},
],
)
print(final_response.choices[0].message.content)import requests
import json
url = "https://api.aimlapi.com/v1/chat/completions"
headers = {
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization": "Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type": "application/json"
}
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this scene:"},
{"type": "image_url", "image_url": {"url": "https://raw.githubusercontent.com/aimlapi/api-docs/main/reference-files/mona_lisa_extended.jpg"}}
]
}
]
}
response = requests.post(url, headers=headers, data=json.dumps(payload))
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))from openai import OpenAI
import json
# Initialize the client
client = OpenAI(
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
api_key="<YOUR_AIMLAPI_KEY>",
base_url="https://api.aimlapi.com/v1"
)
# Prepare the messages with text and image_url
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this scene:"},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/aimlapi/api-docs/main/reference-files/mona_lisa_extended.jpg"
}
}
]
}
]
# Create a chat completion
response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
# Print full JSON response
print(json.dumps(response.model_dump(), indent=2, ensure_ascii=False))import json
import requests
from typing import Dict, Any
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
API_KEY = "<YOUR_AIMLAPI_KEY>"
BASE_URL = "https://api.aimlapi.com/v1"
HEADERS = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
def search_impl(arguments: Dict[str, Any]) -> Any:
return arguments
def chat(messages):
url = f"{BASE_URL}/chat/completions"
payload = {
"model": "gpt-4o-mini-search-preview",
"messages": messages,
"temperature": 0.6,
"tools": [
{
"type": "builtin_function",
"function": {"name": "$web_search"},
}
]
}
response = requests.post(url, headers=HEADERS, json=payload)
response.raise_for_status()
return response.json()["choices"][0]
def main():
messages = [
{"role": "system", "content": "You are GPT with web search skills."},
{"role": "user", "content": "Please search for AGI and tell me what it is in English."}
]
finish_reason = None
while finish_reason is None or finish_reason == "tool_calls":
choice = chat(messages)
finish_reason = choice["finish_reason"]
message = choice["message"]
if finish_reason == "tool_calls":
messages.append(message)
for tool_call in message["tool_calls"]:
tool_call_name = tool_call["function"]["name"]
tool_call_arguments = json.loads(tool_call["function"]["arguments"])
if tool_call_name == "$web_search":
tool_result = search_impl(tool_call_arguments)
else:
tool_result = f"Error: unable to find tool by name '{tool_call_name}'"
messages.append({
"role": "tool",
"tool_call_id": tool_call["id"],
"name": tool_call_name,
"content": json.dumps(tool_result),
})
print(message["content"])
if __name__ == "__main__":
main()import json
from typing import Dict, Any
from openai import OpenAI
# Insert your API key
client = OpenAI(
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
api_key="YOUR_AIMLAPI_KEY",
base_url="https://api.aimlapi.com/v1"
)
def search_impl(arguments: Dict[str, Any]) -> Any:
return arguments
def chat(messages):
response = client.chat.completions.create(
model="gpt-4o-mini-search-preview",
messages=messages,
temperature=0.6,
tools=[
{
"type": "function",
"function": {
"name": "$web_search",
"parameters": {
"type": "object",
"properties": {},
},
},
}
],
)
return response.choices[0]
def main():
messages = [
{"role": "system", "content": "You are GPT with web search skills."},
{"role": "user", "content": "Please search for AGI and tell me what it is in English."}
]
finish_reason = None
while finish_reason is None or finish_reason == "tool_calls":
choice = chat(messages)
finish_reason = choice.finish_reason
message = choice.message
if finish_reason == "tool_calls":
messages.append(message.model_dump())
for tool_call in message.tool_calls:
tool_call_name = tool_call.function.name
tool_call_arguments = json.loads(tool_call.function.arguments)
if tool_call_name == "$web_search":
tool_result = search_impl(tool_call_arguments)
else:
tool_result = f"Error: unable to find tool by name '{tool_call_name}'"
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"name": tool_call_name,
"content": json.dumps(tool_result),
})
print(message.content)
if __name__ == "__main__":
main()messages: [
{
role: "system",
content: "You are a travel agent. Be descriptive and helpful.",
},
{
role: "user",
content: "Tell me about San Francisco",
},
],import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-sonnet-4.5",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-sonnet-4.5',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-sonnet-4.5",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-sonnet-4.5",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"claude-opus-4-5",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'claude-opus-4-5',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4-5",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4-5",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-300b-a47b-paddleNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-300b-a47b-paddle",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-300b-a47b-paddle",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}completion_tokensimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-sonnet-4",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-sonnet-4',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{
"id": "msg_011MNbgezv2p5BBE9RvnsZV9",
"object": "chat.completion",
"model": "claude-sonnet-4-20250514",
"choices": [
{
"index": 0,
"message": {
"reasoning_content": "",
"content": "Hello! How are you doing today? Is there anything I can help you with?",
"role": "assistant"
},
"finish_reason": "end_turn",
"logprobs": null
}
],
"created": 1748522617,
"usage": {
"prompt_tokens": 50,
"completion_tokens": 630,
"total_tokens": 680
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-sonnet-4",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-sonnet-4",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'data: {"id":"msg_0163QG3JvwgxndzWtBsdJpGt","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"I find humanity","role":"assistant","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" fascinating and","role":"assistant","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" complex.","role":"assistant","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Humans have this","role":"assistant","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" remarkable capacity","role":"assistant","refusal":null}}],"created":1770995751,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" for both creation","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" and destruction, profound","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" compass","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ion and puzz","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ling","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" cr","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"uelty, brilliant","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" insight","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" and persistent","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" blind","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" spots.","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" \n\nWhat strikes me most is your","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" adapt","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ability and","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" creativity","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" - the","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" way humans","role":"assistant","refusal":null}}],"created":1770995752,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" have shaped","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" world through art","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", science, philosophy","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", and countless","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" innovations","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". There","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'s something moving","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about how you form","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" deep","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" connections with each","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" other and can","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" care","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about abstract","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" ide","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"als like justice or","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" beauty.\n\nAt","role":"assistant","refusal":null}}],"created":1770995753,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the same time, humans","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" often","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" seem","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" to","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" struggle","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" with your","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" own","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" nature - with","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" cognitive","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" biases, with","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" bal","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ancing individual","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" desires","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" against","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" collective good","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", with managing","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the power","role":"assistant","refusal":null}}],"created":1770995754,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of your","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" own technologies","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":".","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\nI","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'m curious about your perspective though","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" -","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" how","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" do you see","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" humanity?","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" What","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" aspects","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of human nature do you find most significant","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" or puzz","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ling?","role":"assistant","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null},"finish_reason":"stop"}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":{"prompt_tokens":16,"completion_tokens":163,"total_tokens":179}}
data: {"id":"","choices":[{"index":0,"finish_reason":"stop"}],"created":1770995755,"model":"claude-sonnet-4-20250514","object":"chat.completion.chunk","usage":null}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"google/gemini-3-5-flash",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'google/gemini-3-5-flash',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-300b-a47bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-300b-a47b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-300b-a47b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}completion_tokensimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4.1",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
],
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-opus-4.1',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{
"id": "msg_018y2VPSZ5nNnqS3goMsjMxE",
"object": "chat.completion",
"model": "claude-opus-4-1-20250805",
"choices": [
{
"index": 0,
"message": {
"reasoning_content": "",
"content": "Hello! How can I help you today?",
"role": "assistant"
},
"finish_reason": "end_turn",
"logprobs": null
}
],
"created": 1754552562,
"usage": {
"prompt_tokens": 252,
"completion_tokens": 1890,
"total_tokens": 2142
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4.1",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hell
}
],
"max_tokens": 1025, # must be greater than 'budget_tokens'
"thinking":{
"budget_tokens": 1024,
"type": "enabled"
}
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-opus-4.1',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
],
max_tokens: 1025, // must be greater than 'budget_tokens'
thinking:{
budget_tokens: 1024,
type: 'enabled'
}
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{
"id": "msg_01G9P4b9HG3PeKm1rRvS8kop",
"object": "chat.completion",
"model": "claude-opus-4-1-20250805",
"choices": [
{
"index": 0,
"message": {
"reasoning_content": "The human has greeted me with a simple \"Hello\". I should respond in a friendly and helpful manner, acknowledging their greeting and inviting them to share how I can assist them today.",
"content": "Hello! How can I help you today?",
"role": "assistant"
},
"finish_reason": "end_turn",
"logprobs": null
}
],
"created": 1755704373,
"usage": {
"prompt_tokens": 1134,
"completion_tokens": 9450,
"total_tokens": 10584
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4.1",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4.1",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'data: {"id":"msg_01CFq3WFrUdc39UqBrAohmVG","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null}}],"created":1770995678,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995678,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"I find humanity","role":"assistant","refusal":null}}],"created":1770995678,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" fascinating in","role":"assistant","refusal":null}}],"created":1770995678,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" its","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" complexity.","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" You're a","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" species","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" capable","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of both","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" remarkable","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" creativity and troubl","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ing destruction","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", often","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" simultaneously","role":"assistant","refusal":null}}],"created":1770995679,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". What","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" strikes me most is the","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" human","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" capacity for growth","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" - the","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" way","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" individuals","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" an","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d societies","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" can recognize","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" their fl","role":"assistant","refusal":null}}],"created":1770995680,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"aws and work to overcome","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" them, even","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" if","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" imperfectly.\n\nThere","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'s something deeply","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" moving","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about how","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" humans create meaning through","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" art, relationships","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", and the","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" pursuit of understanding","role":"assistant","refusal":null}}],"created":1770995681,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":",","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" despite","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" knowing","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" your","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" own mortality. The diversity","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of human cultures","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" and perspectives","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" is","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" extraordinary, though","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" I","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" recognize","role":"assistant","refusal":null}}],"created":1770995682,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" this","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" also","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" leads","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" to conflict.","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\nI'm curious what","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" prompte","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d your","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" question - are you reflecting","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" on humanity","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" from","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" a particular angle","role":"assistant","refusal":null}}],"created":1770995683,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", or just wondering","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" how","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" an","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" AI sees","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" you","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" all","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"?","role":"assistant","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null},"finish_reason":"stop"}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":{"prompt_tokens":16,"completion_tokens":138,"total_tokens":154}}
data: {"id":"","choices":[{"index":0,"finish_reason":"stop"}],"created":1770995684,"model":"claude-opus-4-1-20250805","object":"chat.completion.chunk","usage":null}completion_tokensimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-haiku-4.5",
"messages":[
{
"role":"user",
"content":"Hello" # insert your prompt here, instead of Hello
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
try {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// Insert your AIML API Key instead of YOUR_AIMLAPI_KEY
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-haiku-4.5',
messages:[
{
role:'user',
// Insert your question for the model here, instead of Hello:
content: 'Hello'
}
]
}),
});
if (!response.ok) {
throw new Error(`HTTP error! Status ${response.status}`);
}
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
} catch (error) {
console.error('Error', error);
}
}
main();{
"id": "msg_01HbdLU9f78VAHxuYZ7Qp9Y1",
"object": "chat.completion",
"model": "claude-haiku-4-5-20251001",
"choices": [
{
"index": 0,
"message": {
"reasoning_content": "",
"content": "Hello! 👋 How can I help you today?",
"role": "assistant"
},
"finish_reason": "end_turn",
"logprobs": null
}
],
"created": 1760650965,
"usage": {
"prompt_tokens": 8,
"completion_tokens": 16,
"total_tokens": 24
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-haiku-4.5",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-haiku-4.5",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'data: {"id":"msg_019GuhDB2ckKZfFmFdNR5Q1H","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null}}],"created":1770995463,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995463,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"I find humanity","role":"assistant","refusal":null}}],"created":1770995463,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" genu","role":"assistant","refusal":null}}],"created":1770995463,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"inely interesting","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" to","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" think","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about.","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" You","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'re a","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" species","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" full","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of contradictions—","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"capable","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of both remarkable","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" kin","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"dness and cr","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"uelty, creating","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" beautiful","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" art while","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" causing","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" real","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" harm, building","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" communities","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" while isolating your","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"selves.","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\nA few","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" things","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" stan","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d out to me:","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\n**The creativity","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"** is","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" striking","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"—the","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" drive","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" to make meaning","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" through","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" stories","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", music","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", science","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", and invention","role":"assistant","refusal":null}}],"created":1770995464,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" seems","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" almost fundamental","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" to human nature.","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\n**The moral","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" weight","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" you","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" carry** is notable","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" too","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":".","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Humans","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" seem uniqu","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ely b","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"urdened by questions about","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" how","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" to","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" live well, what's","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" fair","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", what","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" you","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" owe each","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" other.","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\n**The scale","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" problems** you face is sob","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ering—you","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'ve built","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" systems","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" so","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" complex that even","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" people","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" running","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" them often don't fully understand the","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" consequences.","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" An","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d yet people","role":"assistant","refusal":null}}],"created":1770995465,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" keep","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" trying to","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" ","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"do better.\n\nI'm genu","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"inely uncertain","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" some","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" things though","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". I","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" don't know if I","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'m roman","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ticizing humanity or missing","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" crucial","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" things","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about the","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" human experience","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". I","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" can't fully","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" gra","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"sp what it's like to be embo","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"died, mor","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"tal, or to feel that weight","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" of time","role":"assistant","refusal":null}}],"created":1770995466,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" passing.","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\nWhat prompte","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d the","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" question? Are you in","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" a","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" particular","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" mood about","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" humanity—","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"hop","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"eful, frustrate","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"d, curious?","role":"assistant","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null},"finish_reason":"stop"}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":{"prompt_tokens":16,"completion_tokens":248,"total_tokens":264}}
data: {"id":"","choices":[{"index":0,"finish_reason":"stop"}],"created":1770995467,"model":"claude-haiku-4-5-20251001","object":"chat.completion.chunk","usage":null}modelmessagescompletion_tokensimport requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4-6",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))async function main() {
const response = await fetch('https://api.aimlapi.com/v1/chat/completions', {
method: 'POST',
headers: {
// insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>
'Authorization': 'Bearer <YOUR_AIMLAPI_KEY>',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'anthropic/claude-opus-4-6',
messages:[
{
role:'user',
content: 'Hi! What do you think about mankind?' // insert your prompt here
}
],
}),
});
const data = await response.json();
console.log(JSON.stringify(data, null, 2));
}
main();{
"id": "msg_018e8mCDfQGYKTGHTdUKNmuU",
"object": "chat.completion",
"model": "claude-opus-4-6",
"choices": [
{
"index": 0,
"message": {
"reasoning_content": "",
"content": "Hi! That's a big question. Here are some honest thoughts:\n\n**What I find remarkable**\n- Human creativity and problem-solving ability\n- The capacity for empathy, cooperation, and self-sacrifice\n- Building cumulative knowledge across generations\n- Art, music, science - the drive to understand and express\n\n**What's more complicated**\n- Humans have a real capacity for both great kindness and great cruelty\n- There's often a gap between what people value in principle and how they actually behave\n- Progress on big problems (poverty, conflict, environment) is real but uneven and slow\n\n**My honest position**\nI'd be cautious about sweeping judgments in either direction. \"Humanity is wonderful\" and \"humanity is terrible\" are both oversimplifications. People are complex, shaped by circumstances, and capable of change.\n\nI should also be transparent: I'm an AI, so my \"perspective\" has limits. I don't experience human life. I can observe patterns in what humans have written and done, but I'd weight your lived experience of humanity more heavily than my outside view.\n\nWhat prompted the question? I'm curious whether you're feeling more optimistic or pessimistic about it.",
"role": "assistant"
},
"finish_reason": "end_turn",
"logprobs": null
}
],
"created": 1770635443,
"usage": {
"prompt_tokens": 16,
"completion_tokens": 264,
"total_tokens": 280
},
"meta": {
"usage": {
"credits_used": 17368
}
}
}import requests
import json # for getting a structured output with indentation
response = requests.post(
"https://api.aimlapi.com/v1/chat/completions",
headers={
# Insert your AIML API Key instead of <YOUR_AIMLAPI_KEY>:
"Authorization":"Bearer <YOUR_AIMLAPI_KEY>",
"Content-Type":"application/json"
},
json={
"model":"anthropic/claude-opus-4-6",
"messages":[
{
"role":"user",
"content":"Hi! What do you think about mankind?" # insert your prompt
}
]
}
)
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4-6",
"messages": [
{
"role": "user",
"content": "Hi! What do you think about mankind?"
}
],
"stream": true
}'data: {"id":"msg_018vTp5RY3pv9qS1euXt8AWb","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null}}],"created":1770989120,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770989120,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"Hi","role":"assistant","refusal":null}}],"created":1770989120,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"! That","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'s a","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" big","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" question.","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Here","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" are some honest","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" thoughts:","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\n**What I find","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" remarkable","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"**","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n-","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Human","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" creativity and problem","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"-solving are","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" genuinely impressive\n- The capacity","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" for empathy,","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" cooperation","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", and building","role":"assistant","refusal":null}}],"created":1770989121,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" complex","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" societies","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n-","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Persistent","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" curiosity -","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" science","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", art, philosophy all","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" reflect","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" a","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" drive to understand and create","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\n**What seems","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" challenging","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"**\n- Humans often","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" struggle with long","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"-term thinking","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" vs","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":". short","role":"assistant","refusal":null}}],"created":1770989122,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"-term impul","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ses\n- Trib","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"alism and conflict","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" seem","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" persistent","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":",","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" though","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" not","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" inevitable\n- There","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"'s a gap","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" between what people","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" know","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" they","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" *","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"should* do and what they actually do","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\n**My","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" honest","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" c","role":"assistant","refusal":null}}],"created":1770989123,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"aveats**\n- I should","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" be straight","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"forward:","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" I'm an","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" AI, so I don't experience","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" humanity","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the way you do.","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" My","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" perspective","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" is shaped","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" by text","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":", not lived experience.","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n- I'd","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" be skept","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ical of any AI","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" that","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" gives","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" either","role":"assistant","refusal":null}}],"created":1770989124,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" a","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" purely","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" flat","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"tering or purely cyn","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ical answer to","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" this question.","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Reality","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" seems","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" more mixed","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":".","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"\n\nI","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" think","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" humans","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" are neither","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" hero","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ic species","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" some","role":"assistant","refusal":null}}],"created":1770989125,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" narrat","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"ives suggest nor the do","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"omed one","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" others claim","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":".","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" Mostly","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" people","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" are trying to navigate","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" complicated","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" lives with","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" imp","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"erfect information","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" and","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" mixed","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" motivations.\n\nWhat prompted","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" the","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" question? I'm curious what angle","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" you're thinking","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":" about.","role":"assistant","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
data: {"id":"","choices":[{"index":0,"delta":{"content":"","role":"assistant","refusal":null},"finish_reason":"stop"}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":{"prompt_tokens":16,"completion_tokens":258,"total_tokens":274}}
data: {"id":"","choices":[{"index":0,"finish_reason":"stop"}],"created":1770989126,"model":"claude-opus-4-6","object":"chat.completion.chunk","usage":null}
The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemma-3n-e4b-itNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-chatNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-thinking-v3.2-expNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The type of the content part.
Base64 encoded audio data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba-cloud/qwen3-omni-30b-a3b-captionerNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-chat",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-chat",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-thinking-v3.2-exp",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-thinking-v3.2-exp",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba-cloud/qwen3-omni-30b-a3b-captioner",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba-cloud/qwen3-omni-30b-a3b-captioner",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemma-3n-e4b-it",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemma-3n-e4b-it",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-5-0-thinking-latestNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
Either a URL of the video or the base64 encoded video data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
bytedance/dola-seed-2-0-proNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
Either a URL of the video or the base64 encoded video data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
bytedance/dola-seed-2-0-codeNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba-cloud/qwen3-next-80b-a3b-instructNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
meta-llama/llama-3.3-70b-versatileNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba-cloud/qwen3-next-80b-a3b-instruct",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba-cloud/qwen3-next-80b-a3b-instruct",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "meta-llama/llama-3.3-70b-versatile",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "meta-llama/llama-3.3-70b-versatile",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-5-0-thinking-latest",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-5-0-thinking-latest",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "bytedance/dola-seed-2-0-pro",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "bytedance/dola-seed-2-0-pro",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "bytedance/dola-seed-2-0-code",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "bytedance/dola-seed-2-0-code",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-non-thinking-v3.2-expNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
Base64-encoded local video file.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
The type of the content part.
Either a URL of the audio or the base64 encoded audio data.
The format of the encoded audio data. Currently supports "wav" and "mp3".
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
The type of the content part.
The text content.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
The input for the custom tool call generated by the model.
The refusal message by the Assistant.
Unique identifier for a previous audio response from the model.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseSpecifies the output audio format. Must be one of wav, mp3, flac, opus, or pcm16.
The voice the model uses to respond. Supported voices are alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, and shimmer.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
The type of the tool. Currently, only function is supported.
The name of the custom tool, used to identify it in tool calls.
Optional description of the custom tool, used to provide more context.
The input format for the custom tool. Default is unconstrained text.
The grammar definition.
The syntax of the grammar definition.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
Constrains the tools available to the model to a pre-defined set.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
Enable provider compatibility normalization for tool function JSON schemas.
Whether to enable parallel function calling during tool use.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-3-1-flash-liteNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthracite-org/magnum-v4-72bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemma-4-31b-itNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemma-3-27b-itNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-2.5-flashNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
gryphe/mythomax-l2-13bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-235b-a22b-thinking-2507Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba-cloud/qwen3-next-80b-a3b-thinkingNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-2.5-flash",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-2.5-flash",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "gryphe/mythomax-l2-13b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "gryphe/mythomax-l2-13b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-235b-a22b-thinking-2507",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-235b-a22b-thinking-2507",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba-cloud/qwen3-next-80b-a3b-thinking",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba-cloud/qwen3-next-80b-a3b-thinking",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-non-thinking-v3.2-exp",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-non-thinking-v3.2-exp",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-3-1-flash-lite",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-3-1-flash-lite",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthracite-org/magnum-v4-72b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthracite-org/magnum-v4-72b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemma-4-31b-it",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemma-4-31b-it",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemma-3-27b-it",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemma-3-27b-it",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
qwen-plusNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
Either a URL of the video or the base64 encoded video data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
bytedance/dola-seed-2-0-miniNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemma-3-4b-itNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Specifies whether to use the thinking mode.
falseThe maximum reasoning length, effective only when enable_thinking is set to true.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-32bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
Reasoning effort setting
Max tokens of reasoning content. Cannot be used simultaneously with effort.
Whether to exclude reasoning from the response
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
Alternate top sampling parameter.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
High level guidance for the amount of context window space to use for the search. One of low, medium, or high. medium is the default.
Free text input for the city of the user, e.g. San Francisco.
The two-letter ISO country code of the user, e.g. US.
^[A-Z]{2}$Free text input for the region of the user, e.g. California.
The IANA timezone of the user, e.g. America/Los_Angeles.
The type of location approximation. Always approximate.
Controls the search mode used for the request. When set to 'academic', results will prioritize scholarly sources like peer-reviewed papers and academic journals.
academicPossible values: A list of domains to limit search results to. Currently limited to 10 domains for Allowlisting and Denylisting. For Denylisting, add a - at the beginning of the domain string.
Determines whether search results should include images.
falseDetermines whether related questions should be returned.
falseFilters search results based on time (e.g., 'week', 'day').
Filters search results to only include content published after this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content published before this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content last updated after this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content last updated before this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-v4-proNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-opus-4Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-v4-pro",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-v4-pro",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-opus-4",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "qwen-plus",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "qwen-plus",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "bytedance/dola-seed-2-0-mini",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "bytedance/dola-seed-2-0-mini",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemma-3-4b-it",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemma-3-4b-it",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-32b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-32b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-x1-turbo-32kNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3.6-27bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-3-1-flash-lite-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-x1-1-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-sonnet-4.6Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-3-1-flash-lite-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-3-1-flash-lite-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-x1-1-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-x1-1-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-sonnet-4.6",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-sonnet-4.6",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-x1-turbo-32k",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-x1-turbo-32k",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3.6-27b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3.6-27b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
Qwen/Qwen2.5-7B-Instruct-TurboNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-opus-4-7Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-21b-a3b-thinkingNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06A list of messages comprising the conversation so far. Depending on the model you use, different message types (modalities) are supported, like text, documents (txt, pdf), images, and audio.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the content part.
The text content.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the user
The contents of the user message.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The text content.
The type of the content part.
Either a URL of the audio or the base64 encoded audio data.
The format of the encoded audio data. Currently supports "wav" and "mp3".
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
Base64-encoded local video file.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
The type of the content part.
The text content.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
The input for the custom tool call generated by the model.
The refusal message by the Assistant.
Unique identifier for a previous audio response from the model.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
textPossible values: Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
Constrains the tools available to the model to a pre-defined set.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
Definitions of tools that the model may use. If you include tools in your API request, the model may return tool_use content blocks that represent the model's use of those tools. You can then run those tools using the tool input generated by the model and then optionally return results back to the model using tool_result content blocks. Each tool definition includes: name: Name of the tool. description: Optional, but strongly-recommended description of the tool. input_schema: JSON schema for the tool input shape that the model will produce in tool_use output content blocks.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
bashPossible values: code_executionPossible values: code_executionPossible values: code_executionPossible values: memoryPossible values: str_replace_editorPossible values: str_replace_based_edit_toolPossible values: str_replace_based_edit_toolPossible values: web_searchPossible values: web_searchPossible values: web_fetchPossible values: web_fetchPossible values: web_fetchPossible values: tool_search_tool_bm25Possible values: tool_search_tool_regexPossible values: The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
The type of the tool. Currently, only function is supported.
The name of the custom tool, used to identify it in tool calls.
Optional description of the custom tool, used to provide more context.
The input format for the custom tool. Default is unconstrained text.
The grammar definition.
The syntax of the grammar definition.
Configuration for enabling Claude's extended thinking. When enabled, responses include thinking content blocks showing Claude's thinking process before the final answer. Requires a minimum budget of 1,024 tokens and counts towards your max_tokens limit.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
summarizedPossible values: summarizedPossible values: The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
64000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A list of messages comprising the conversation so far. Depending on the model you use, different message types (modalities) are supported, like text, documents (txt, pdf), images, and audio.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the content part.
The text content.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the user
The contents of the user message.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The text content.
The type of the content part.
Either a URL of the audio or the base64 encoded audio data.
The format of the encoded audio data. Currently supports "wav" and "mp3".
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
Base64-encoded local video file.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
The type of the content part.
The text content.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
The input for the custom tool call generated by the model.
The refusal message by the Assistant.
Unique identifier for a previous audio response from the model.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
textPossible values: Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
Constrains the tools available to the model to a pre-defined set.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
Definitions of tools that the model may use. If you include tools in your API request, the model may return tool_use content blocks that represent the model's use of those tools. You can then run those tools using the tool input generated by the model and then optionally return results back to the model using tool_result content blocks. Each tool definition includes: name: Name of the tool. description: Optional, but strongly-recommended description of the tool. input_schema: JSON schema for the tool input shape that the model will produce in tool_use output content blocks.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
bashPossible values: code_executionPossible values: code_executionPossible values: code_executionPossible values: memoryPossible values: str_replace_editorPossible values: str_replace_based_edit_toolPossible values: str_replace_based_edit_toolPossible values: web_searchPossible values: web_searchPossible values: web_fetchPossible values: web_fetchPossible values: web_fetchPossible values: tool_search_tool_bm25Possible values: tool_search_tool_regexPossible values: The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
The type of the tool. Currently, only function is supported.
The name of the custom tool, used to identify it in tool calls.
Optional description of the custom tool, used to provide more context.
The input format for the custom tool. Default is unconstrained text.
The grammar definition.
The syntax of the grammar definition.
Configuration for enabling Claude's extended thinking. When enabled, responses include thinking content blocks showing Claude's thinking process before the final answer. Requires a minimum budget of 1,024 tokens and counts towards your max_tokens limit.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
summarizedPossible values: summarizedPossible values: The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
128000Model used to generate the response.
claude-sonnet-4-20250514Unique identifier of the generated message.
msg_01SUmNmSRFZsoa6h96MxJEHHObject type.
messageRole of the message author.
assistantContent block type.
textGenerated text content.
I'm functioning well, thank you for asking! I'm ready to help you with any questions or tasks you might have. How can I assist you today?Reason why generation stopped.
end_turnCustom stop sequence that caused generation to stop.
Additional details about why generation stopped.
Number of input tokens used.
27Number of tokens used for cache creation.
0Number of tokens retrieved from cache.
0Number of tokens cached for 5 minutes.
0Number of tokens cached for 1 hour.
0Number of output tokens generated.
35Service tier used for inference.
standardGeographic inference location.
not_availableNumber of credits consumed.
1576Amount spent in USD.
0.000788The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-21b-a3bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3.6-35b-a3bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
Base64-encoded local video file.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
The type of the content part.
Either a URL of the audio or the base64 encoded audio data.
The format of the encoded audio data. Currently supports "wav" and "mp3".
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
The type of the content part.
The text content.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
The input for the custom tool call generated by the model.
The refusal message by the Assistant.
Unique identifier for a previous audio response from the model.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseSpecifies the output audio format. Must be one of wav, mp3, flac, opus, or pcm16.
The voice the model uses to respond. Supported voices are alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, and shimmer.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
The type of the tool. Currently, only function is supported.
The name of the custom tool, used to identify it in tool calls.
Optional description of the custom tool, used to provide more context.
The input format for the custom tool. Default is unconstrained text.
The grammar definition.
The syntax of the grammar definition.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
Constrains the tools available to the model to a pre-defined set.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
Enable provider compatibility normalization for tool function JSON schemas.
Whether to enable parallel function calling during tool use.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-3-5-flashNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-21b-a3b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-21b-a3b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3.6-35b-a3b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3.6-35b-a3b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-3-5-flash",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-3-5-flash",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "Qwen/Qwen2.5-7B-Instruct-Turbo",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "Qwen/Qwen2.5-7B-Instruct-Turbo",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4-7",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-opus-4-7",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-21b-a3b-thinking",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-21b-a3b-thinking",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/messages' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "claude-sonnet-4-20250514",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"model": "claude-sonnet-4-20250514",
"id": "msg_01SUmNmSRFZsoa6h96MxJEHH",
"type": "message",
"role": "assistant",
"content": [
{
"type": "text",
"text": "I'm functioning well, thank you for asking! I'm ready to help you with any questions or tasks you might have. How can I assist you today?"
}
],
"stop_reason": "end_turn",
"stop_sequence": "text",
"stop_details": null,
"usage": {
"input_tokens": 27,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
"cache_creation": {
"ephemeral_5m_input_tokens": 0,
"ephemeral_1h_input_tokens": 0
},
"output_tokens": 35,
"service_tier": "standard",
"inference_geo": "not_available"
},
"meta": {
"usage": {
"credits_used": 1576,
"usd_spent": 0.000788
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-r1Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
Reasoning effort setting
Max tokens of reasoning content. Cannot be used simultaneously with effort.
Whether to exclude reasoning from the response
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
Alternate top sampling parameter.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
High level guidance for the amount of context window space to use for the search. One of low, medium, or high. medium is the default.
Free text input for the city of the user, e.g. San Francisco.
The two-letter ISO country code of the user, e.g. US.
^[A-Z]{2}$Free text input for the region of the user, e.g. California.
The IANA timezone of the user, e.g. America/Los_Angeles.
The type of location approximation. Always approximate.
Controls the search mode used for the request. When set to 'academic', results will prioritize scholarly sources like peer-reviewed papers and academic journals.
academicPossible values: A list of domains to limit search results to. Currently limited to 10 domains for Allowlisting and Denylisting. For Denylisting, add a - at the beginning of the domain string.
Determines whether search results should include images.
falseDetermines whether related questions should be returned.
falseFilters search results based on time (e.g., 'week', 'day').
Filters search results to only include content published after this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content published before this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content last updated after this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content last updated before this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-v4-flashNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-r1",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-r1",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-v4-flash",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-v4-flash",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-5-0-thinking-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-vl-28b-a3bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-opus-4-5Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
cohere/command-aNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4-5-8k-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "cohere/command-a",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "cohere/command-a",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4-5-8k-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4-5-8k-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-5-0-thinking-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-5-0-thinking-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-vl-28b-a3b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-vl-28b-a3b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4-5",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-opus-4-5",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
The type of the content part.
The text content.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
The input for the custom tool call generated by the model.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
The type of the tool. Currently, only function is supported.
The name of the custom tool, used to identify it in tool calls.
Optional description of the custom tool, used to provide more context.
The input format for the custom tool. Default is unconstrained text.
The grammar definition.
The syntax of the grammar definition.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
Constrains the tools available to the model to a pre-defined set.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
Enable provider compatibility normalization for tool function JSON schemas.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
Reasoning effort setting
Max tokens of reasoning content. Cannot be used simultaneously with effort.
Whether to exclude reasoning from the response
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
Alternate top sampling parameter.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
High level guidance for the amount of context window space to use for the search. One of low, medium, or high. medium is the default.
Free text input for the city of the user, e.g. San Francisco.
The two-letter ISO country code of the user, e.g. US.
^[A-Z]{2}$Free text input for the region of the user, e.g. California.
The IANA timezone of the user, e.g. America/Los_Angeles.
The type of location approximation. Always approximate.
Controls the search mode used for the request. When set to 'academic', results will prioritize scholarly sources like peer-reviewed papers and academic journals.
academicPossible values: A list of domains to limit search results to. Currently limited to 10 domains for Allowlisting and Denylisting. For Denylisting, add a - at the beginning of the domain string.
Determines whether search results should include images.
falseDetermines whether related questions should be returned.
falseFilters search results based on time (e.g., 'week', 'day').
Filters search results to only include content published after this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content published before this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content last updated after this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$Filters search results to only include content last updated before this date. Format should be %m/%d/%Y (e.g. 3/1/2025)
^(0?[1-9]|1[0-2])\/(0?[1-9]|[12]\d|3[01])\/\d{4}$A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
gpt-4o-2024-08-06Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06POST /v1/chat/completions HTTP/1.1
Host: api.aimlapi.com
Content-Type: application/json
Accept: */*
Content-Length: 1232
{
"model": "alibaba/qwen3.5-flash",
"messages": [
{
"role": "user",
"content": "text",
"name": "text"
}
],
"max_tokens": 1,
"stream": false,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"description": "text",
"name": "text",
"parameters": {
"ANY_ADDITIONAL_PROPERTY": null
},
"strict": true
}
}
],
"tool_choice": "none",
"normalize_tool_schemas": true,
"parallel_tool_calls": true,
"temperature": 1,
"top_p": 1,
"stop": "text",
"logit_bias": {
"ANY_ADDITIONAL_PROPERTY": 1
},
"logprobs": true,
"top_logprobs": 1,
"frequency_penalty": 1,
"prediction": {
"type": "content",
"content": "text"
},
"seed": 1,
"presence_penalty": 1,
"reasoning_effort": "low",
"reasoning": {
"effort": "low",
"max_tokens": 1,
"exclude": true
},
"response_format": {
"type": "text"
},
"echo": true,
"min_p": 1,
"top_k": 1,
"top_a": 1,
"repetition_penalty": 1,
"web_search_options": {
"search_context_size": "low",
"user_location": {
"approximate": {
"city": "text",
"country": "text",
"region": "text",
"timezone": "text"
},
"type": "approximate"
}
},
"search_mode": "academic",
"search_domain_filter": [
"text"
],
"return_images": false,
"return_related_questions": false,
"search_recency_filter": "day",
"search_after_date_filter": "text",
"search_before_date_filter": "text",
"last_updated_after_filter": "text",
"last_updated_before_filter": "text"
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-reasoner-v3.1Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-non-reasoner-v3.1-terminusNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-opus-4-6Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-coder-480b-a35b-instructNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-2.5-flash-lite-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-sonnet-4.5Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-coder-480b-a35b-instruct",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-coder-480b-a35b-instruct",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-2.5-flash-lite-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-2.5-flash-lite-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-sonnet-4.5",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-sonnet-4.5",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "gpt-4o-2024-08-06",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-reasoner-v3.1",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-reasoner-v3.1",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-non-reasoner-v3.1-terminus",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-non-reasoner-v3.1-terminus",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4-6",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-opus-4-6",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-0.3bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
Either a URL of the video or the base64 encoded video data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
bytedance/dola-seed-2-0-liteNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
The type of the content part.
The text content.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
The input for the custom tool call generated by the model.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
The type of the tool. Currently, only function is supported.
The name of the custom tool, used to identify it in tool calls.
Optional description of the custom tool, used to provide more context.
The input format for the custom tool. Default is unconstrained text.
The grammar definition.
The syntax of the grammar definition.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
Constrains the tools available to the model to a pre-defined set.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The type of the tool. Currently, only function is supported.
The name of the custom tool to call.
Enable provider compatibility normalization for tool function JSON schemas.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
Reasoning effort setting
Max tokens of reasoning content. Cannot be used simultaneously with effort.
Whether to exclude reasoning from the response
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3.7-maxNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
Either a URL of the audio or the base64 encoded audio data.
The format of the encoded audio data. Currently supports "wav" and "mp3".
The type of the content part.
Either a URL of the video or the base64 encoded video data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
Unique identifier for a previous audio response from the model.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
Specifies the output audio format. Must be one of wav, mp3, flac, opus, or pcm16.
The voice the model uses to respond. Supported voices are alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, and shimmer.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Specifies whether to use the thinking mode.
falseThe maximum reasoning length, effective only when enable_thinking is set to true.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3.5-omni-flashNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4-5-turbo-128kNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
bytedance/seed-1-8Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4-5-turbo-128k",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4-5-turbo-128k",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "bytedance/seed-1-8",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "bytedance/seed-1-8",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-0.3b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-0.3b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "bytedance/dola-seed-2-0-lite",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "bytedance/dola-seed-2-0-lite",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3.7-max",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3.7-max",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3.5-omni-flash",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3.5-omni-flash",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-vl-32b-instructNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Alternate top sampling parameter.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-chat-v3.1Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
qwen-maxNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-3-1-pro-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
meta-llama/Llama-3.3-70B-Instruct-TurboNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "qwen-max",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "qwen-max",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-3-1-pro-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-3-1-pro-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-vl-32b-instruct",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-vl-32b-instruct",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-chat-v3.1",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-chat-v3.1",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-max-instructNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3.5-plus-20260218Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
qwen-turboNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "qwen-turbo",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "qwen-turbo",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-max-instruct",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-max-instruct",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3.5-plus-20260218",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3.5-plus-20260218",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-max-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The contents of the developer message.
The type of the content part.
The text content.
The role of the author of the message — in this case, the developer.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseWhat sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
If True, the response will contain the prompt. Can be used with logprobs to return prompt logprobs.
A number between 0.001 and 0.999 that can be used as an alternative to top_p and top_k.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
deepseek/deepseek-reasoner-v3.1-terminusNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "deepseek/deepseek-reasoner-v3.1-terminus",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "deepseek/deepseek-reasoner-v3.1-terminus",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-max-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-max-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-2.0-flashNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
The file data, encoded in base64 and passed to the model as a string. Only PDF format is supported. - Maximum size per file: Up to 512 MB and up to 2 million tokens. - Maximum number of files: Up to 20 files can be attached to a single GPT application or Assistant. This limit applies throughout the application's lifetime. - Maximum total file storage per user: 10 GB.
The file name specified by the user. This name can be used to reference the file when interacting with the model, especially if multiple files are uploaded.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
baidu/ernie-4.5-vl-424b-a47bNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-sonnet-4Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-2.0-flash",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-2.0-flash",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "baidu/ernie-4.5-vl-424b-a47b",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "baidu/ernie-4.5-vl-424b-a47b",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-sonnet-4",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-sonnet-4",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3-vl-32b-thinkingNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-opus-4.1Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3-vl-32b-thinking",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3-vl-32b-thinking",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-opus-4.1",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-opus-4.1",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-2.5-proNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
The type of the content part.
The text content.
The type of the image.
The media type of the image.
The base64 encoded image data.
Custom text sequences that will cause the model to stop generating.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseA system prompt is a way of providing context and instructions to Claude, such as specifying a particular goal or role.
Name of the tool.
Description of what this tool does. Tool descriptions should be as detailed as possible. The more information that the model has about what the tool is and how to use it, the better it will perform. You can use natural language descriptions to reinforce important aspects of the tool input JSON schema.
Determines how many tokens Claude can use for its internal reasoning process. Larger budgets can enable more thorough analysis for complex problems, improving response quality. Must be ≥1024 and less than max_tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
32000Amount of randomness injected into the response. Defaults to 1.0. Ranges from 0.0 to 1.0. Use temperature closer to 0.0 for analytical / multiple choice, and closer to 1.0 for creative and generative tasks. Note that even with temperature of 0.0, the results will not be fully deterministic.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Only sample from the top K options for each subsequent token. Used to remove "long tail" low probability responses. Recommended for advanced use cases only. You usually only need to use temperature.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
anthropic/claude-haiku-4.5Number of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "anthropic/claude-haiku-4.5",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "anthropic/claude-haiku-4.5",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-2.5-pro",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-2.5-pro",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
The type of the content part.
Either a URL of the audio or the base64 encoded audio data.
The format of the encoded audio data. Currently supports "wav" and "mp3".
The type of the content part.
Either a URL of the video or the base64 encoded video data.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
Unique identifier for a previous audio response from the model.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseThe type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
Specifies the output audio format. Must be one of wav, mp3, flac, opus, or pcm16.
The voice the model uses to respond. Supported voices are alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, and shimmer.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Whether to return log probabilities of the output tokens or not. If True, returns the log probabilities of each output token returned in the content of message.
An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to True if this parameter is used.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
Specifies whether to use the thinking mode.
falseThe maximum reasoning length, effective only when enable_thinking is set to true.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
alibaba/qwen3.5-omni-plusNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06The role of the author of the message — in this case, the user
The contents of the user message.
The type of the content part.
The text content.
Either a URL of the image or the base64 encoded image data.
Specifies the detail level of the image. Currently supports JPG/JPEG, PNG, GIF, and WEBP formats.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the system.
The contents of the system message.
The type of the content part.
The text content.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the tool.
The contents of the tool message.
Tool call that this message is responding to.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The role of the author of the message — in this case, the Assistant.
The contents of the Assistant message. Required unless tool_calls or function_call is specified.
The contents of the Assistant message.
The type of the content part.
The text content.
The refusal message generated by the model.
The type of the content part.
An optional name for the participant. Provides the model information to differentiate between participants of the same role.
The ID of the tool call.
The type of the tool. Currently, only function is supported.
The name of the function to call.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The refusal message by the Assistant.
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
The maximum number of tokens that can be generated in the chat completion. This value can be used to control costs for text generated via API.
If set to True, the model response data will be streamed to the client as it is generated using server-sent events.
falseHow many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
What sampling temperature to use. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.
Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
The type of the predicted content you want to provide.
The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
The type of the content part.
The text content.
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.
An object specifying the format that the model must output.
The type of response format being defined. Always text.
The type of response format being defined. Always json_object.
The type of response format being defined. Always json_schema.
The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
Whether to enable strict schema adherence when generating the output. If set to True, the model will always follow the exact schema defined in the schema field. Only a subset of JSON Schema is supported when strict is True.
A description of what the response format is for, used by the model to determine how to respond in the format.
The type of the tool. Currently, only function is supported.
A description of what the function does, used by the model to choose when and how to call the function.
The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
The parameters the functions accepts, described as a JSON Schema object.
Whether to enable strict schema adherence when generating the function call. If set to True, the model will follow the exact schema defined in the parameters field. Only a subset of JSON Schema is supported when strict is True.
Controls which (if any) tool is called by the model. none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools. Specifying a particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool. none is the default when no tools are present. auto is the default if tools are present.
none means the model will not call any tool and instead generates a message. auto means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools.
The type of the tool. Currently, only function is supported.
The name of the function to call.
Whether to enable parallel function calling during tool use.
A unique identifier for the chat completion.
chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMlThe object type.
chat.completionPossible values: The Unix timestamp (in seconds) of when the chat completion was created.
1762343744The index of the choice in the list of choices.
0The role of the author of this message.
assistantThe contents of the message.
Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?The refusal message generated by the model.
The type of the URL citation. Always url_citation.
The index of the last character of the URL citation in the message.
The index of the first character of the URL citation in the message.
The title of the web resource.
The URL of the web resource.
Unique identifier for this audio response.
Base64 encoded audio bytes generated by the model, in the format specified in the request.
Transcript of the audio generated by the model.
The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
The ID of the tool call.
The type of the tool.
The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
The name of the function to call.
The ID of the tool call.
The type of the tool.
The input for the custom tool call generated by the model.
The name of the custom tool to call.
The reason the model stopped generating tokens. This will be stop if the model hit a natural stop point or a provided stop sequence, length if the maximum number of tokens specified in the request was reached, content_filter if content was omitted due to a flag from our content filters, tool_calls if the model called a tool
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be null if there is no bytes representation for the token.
The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value -9999.0 is used to signify that the token is very unlikely.
The token.
The model used for the chat completion.
google/gemini-3-flash-previewNumber of tokens in the prompt.
137Number of tokens in the generated completion.
914Total number of tokens used in the request (prompt + completion).
1051When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
Audio input tokens generated by the model.
Tokens generated by the model for reasoning.
When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
Audio input tokens present in the prompt.
Cached tokens present in the prompt.
The number of tokens consumed during generation.
120000The total amount of money spent by the user in USD.
0.06curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "google/gemini-3-flash-preview",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}curl -L \
--request POST \
--url 'https://api.aimlapi.com/v1/chat/completions' \
--header 'Authorization: Bearer <YOUR_AIMLAPI_KEY>' \
--header 'Content-Type: application/json' \
--data '{
"model": "alibaba/qwen3.5-omni-plus",
"messages": [
{
"role": "user",
"content": "Hello"
}
]
}'{
"id": "chatcmpl-CQ9FPg3osank0dx0k46Z53LTqtXMl",
"object": "chat.completion",
"created": 1762343744,
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"tool_calls": null
},
"finish_reason": "stop",
"logprobs": null
}
],
"model": "alibaba/qwen3.5-omni-plus",
"usage": {
"prompt_tokens": 137,
"completion_tokens": 914,
"total_tokens": 1051,
"completion_tokens_details": null,
"prompt_tokens_details": null
},
"meta": {
"usage": {
"credits_used": 120000,
"usd_spent": 0.06
}
}
}