Integrate Your Own Model

Guide for using your own LLM or server with ElevenLabs SDK.

Using Your Own OpenAI Key for LLM

To integrate a custom OpenAI key, create a secret containing your OPENAI_API_KEY:

1

Navigate to the “Secrets” page and select “Add Secret”

2

Choose “Custom LLM” from the dropdown menu.

3

Enter the URL, your model, and the secret you created.

4

Set “Custom LLM extra body” to true.

Custom LLM Server

To bring a custom LLM server, set up a compatible server endpoint using OpenAI’s style, specifically targeting create_chat_completion.

Here’s an example server implementation using FastAPI and OpenAI’s Python SDK:

1import json
2import os
3import fastapi
4from fastapi.responses import StreamingResponse
5from openai import AsyncOpenAI
6import uvicorn
7import logging
8from dotenv import load_dotenv
9from pydantic import BaseModel
10from typing import List, Optional
11
12# Load environment variables from .env file
13load_dotenv()
14
15# Retrieve API key from environment
16OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
17if not OPENAI_API_KEY:
18 raise ValueError("OPENAI_API_KEY not found in environment variables")
19
20app = fastapi.FastAPI()
21oai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
22
23class Message(BaseModel):
24 role: str
25 content: str
26
27class ChatCompletionRequest(BaseModel):
28 messages: List[Message]
29 model: str
30 temperature: Optional[float] = 0.7
31 max_tokens: Optional[int] = None
32 stream: Optional[bool] = False
33 user_id: Optional[str] = None
34
35@app.post("/v1/chat/completions")
36async def create_chat_completion(request: ChatCompletionRequest) -> StreamingResponse:
37 oai_request = request.dict(exclude_none=True)
38 if "user_id" in oai_request:
39 oai_request["user"] = oai_request.pop("user_id")
40
41 chat_completion_coroutine = await oai_client.chat.completions.create(**oai_request)
42
43 async def event_stream():
44 try:
45 async for chunk in chat_completion_coroutine:
46 # Convert the ChatCompletionChunk to a dictionary before JSON serialization
47 chunk_dict = chunk.model_dump()
48 yield f"data: {json.dumps(chunk_dict)}\n\n"
49 yield "data: [DONE]\n\n"
50 except Exception as e:
51 logging.error("An error occurred: %s", str(e))
52 yield f"data: {json.dumps({'error': 'Internal error occurred!'})}\n\n"
53
54 return StreamingResponse(event_stream(), media_type="text/event-stream")
55
56if __name__ == "__main__":
57 uvicorn.run(app, host="0.0.0.0", port=8013)

Run this code or your own server code.

Setting Up a Public URL for Your Server

To make your server accessible, create a public URL using a tunneling tool like ngrok:

$ngrok http --url=<Your url>.ngrok.app 8013

Configuring Elevenlabs CustomLLM

Now let’s make the changes in Elevenlabs

Direct your server URL to ngrok endpoint, setup “Limit token usage” to 5000 and set “Custom LLM extra body” to true.

You can start interacting with Conversational AI with your own LLM server

Additional Features

You may pass additional parameters to your custom LLM implementation.

1

Define the Extra Parameters

Create an object containing your custom parameters:

1from elevenlabs.conversational_ai.conversation import Conversation, ConversationConfig
2
3extra_body_for_convai = {
4 "UUID": "123e4567-e89b-12d3-a456-426614174000",
5 "parameter-1": "value-1",
6 "parameter-2": "value-2",
7}
8
9config = ConversationConfig(
10 extra_body=extra_body_for_convai,
11)
2

Update the LLM Implementation

Modify your custom LLM code to handle the additional parameters:

1import json
2import os
3import fastapi
4from fastapi.responses import StreamingResponse
5from fastapi import Request
6from openai import AsyncOpenAI
7import uvicorn
8import logging
9from dotenv import load_dotenv
10from pydantic import BaseModel
11from typing import List, Optional
12
13# Load environment variables from .env file
14load_dotenv()
15
16# Retrieve API key from environment
17OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
18if not OPENAI_API_KEY:
19 raise ValueError("OPENAI_API_KEY not found in environment variables")
20
21app = fastapi.FastAPI()
22oai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
23
24class Message(BaseModel):
25 role: str
26 content: str
27
28class ChatCompletionRequest(BaseModel):
29 messages: List[Message]
30 model: str
31 temperature: Optional[float] = 0.7
32 max_tokens: Optional[int] = None
33 stream: Optional[bool] = False
34 user_id: Optional[str] = None
35 elevenlabs_extra_body: Optional[dict] = None
36
37@app.post("/v1/chat/completions")
38async def create_chat_completion(request: ChatCompletionRequest) -> StreamingResponse:
39 oai_request = request.dict(exclude_none=True)
40 print(oai_request)
41 if "user_id" in oai_request:
42 oai_request["user"] = oai_request.pop("user_id")
43
44 if "elevenlabs_extra_body" in oai_request:
45 oai_request.pop("elevenlabs_extra_body")
46
47 chat_completion_coroutine = await oai_client.chat.completions.create(**oai_request)
48
49 async def event_stream():
50 try:
51 async for chunk in chat_completion_coroutine:
52 chunk_dict = chunk.model_dump()
53 yield f"data: {json.dumps(chunk_dict)}\n\n"
54 yield "data: [DONE]\n\n"
55 except Exception as e:
56 logging.error("An error occurred: %s", str(e))
57 yield f"data: {json.dumps({'error': 'Internal error occurred!'})}\n\n"
58
59 return StreamingResponse(event_stream(), media_type="text/event-stream")
60
61if __name__ == "__main__":
62 uvicorn.run(app, host="0.0.0.0", port=8013)

Example Request

With this custom message setup, your LLM will receive requests in this format:

1{
2 "messages": [
3 {
4 "role": "system",
5 "content": "\n <Redacted>"
6 },
7 {
8 "role": "assistant",
9 "content": "Hey I'm currently unavailable."
10 },
11 {
12 "role": "user",
13 "content": "Hey, who are you?"
14 }
15 ],
16 "model": "gpt-4o",
17 "temperature": 0.5,
18 "max_tokens": 5000,
19 "stream": true,
20 "elevenlabs_extra_body": {
21 "UUID": "123e4567-e89b-12d3-a456-426614174000",
22 "parameter-1": "value-1",
23 "parameter-2": "value-2"
24 }
25}
Built with