LiteLLM is an open-source Python library that provides a unified API for interacting with multiple large language model providers. It allows developers to switch between different models with minimal code changes, optimizing cost and performance. LiteLLM simplifies integration by offering a single interface for various LLM endpoints, enabling seamless experimentation and deployment across different AI providers.
If you use this library, you can also call models from AI/ML API through it. Below are the most common use cases:
import litellm
response = litellm.completion(
# The model name must include prefix "openai/" + the model id from AI/ML API:
model="openai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
# your AI/ML API api-key:
api_key="<YOUR_AIMLAPI_KEY>",
api_base="https://api.aimlapi.com/v2",
messages=[
{
"role": "user",
"content": "Hey, how's it going?",
}
],
)
Streaming
import litellm
response = litellm.completion(
# The model name must include prefix "openai/" + the model id from AI/ML API:
model="openai/Qwen/Qwen2-72B-Instruct",
# your AI/ML API api-key
api_key="<YOUR_AIMLAPI_KEY>",
api_base="https://api.aimlapi.com/v2",
messages=[
{
"role": "user",
"content": "Hey, how's it going?",
}
],
stream=True,
)
for chunk in response:
print(chunk)
Async Completion
import asyncio
import litellm
async def main():
response = await litellm.acompletion(
# The model name must include prefix "openai/" + the model id from AI/ML API:
model="openai/anthropic/claude-3-5-haiku",
# your AI/ML API api-key
api_key="<YOUR_AIMLAPI_KEY>",
api_base="https://api.aimlapi.com/v2",
messages=[
{
"role": "user",
"content": "Hey, how's it going?",
}
],
)
print(response)
if __name__ == "__main__":
asyncio.run(main())
Async Streaming
import asyncio
import traceback
import litellm
async def main():
try:
print("test acompletion + streaming")
response = await litellm.acompletion(
# The model name must include prefix "openai/" + model id from AI/ML API:
model="openai/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
# your AI/ML API api-key
api_key="<YOUR_AIMLAPI_KEY>",
api_base="https://api.aimlapi.com/v2",
messages=[{"content": "Hey, how's it going?", "role": "user"}],
stream=True,
)
print(f"response: {response}")
async for chunk in response:
print(chunk)
except:
print(f"error occurred: {traceback.format_exc()}")
pass
if __name__ == "__main__":
asyncio.run(main())
Async Embedding
import asyncio
import litellm
async def main():
response = await litellm.aembedding(
# The model name must include prefix "openai/" + model id from AI/ML API:
model="openai/text-embedding-3-small",
# your AI/ML API api-key
api_key="<YOUR_AIMLAPI_KEY>",
api_base="https://api.aimlapi.com/v1", # 👈 the URL has changed from v2 to v1
input="Your text string",
)
print(response)
if __name__ == "__main__":
asyncio.run(main())
Async Image Generation
import asyncio
import litellm
async def main():
response = await litellm.aimage_generation(
# The model name must include prefix "openai/" + model id from AI/ML API:
model="openai/dall-e-3",
# your AI/ML API api-key
api_key="",
api_base="https://api.aimlapi.com/v1", # 👈 the URL has changed from v2 to v1
prompt="A cute baby sea otter",
)
print(response)
if __name__ == "__main__":
asyncio.run(main())