Content-Length: 1036685 | pFad | http://github.com/stacklok/codegate/commit/dc2ceb04b3f24fbc6de873103c512d21629826a2

D0 Removed `litellm` from dependencies. (#1300) · stacklok/codegate@dc2ceb0 · GitHub
Skip to content

Commit dc2ceb0

Browse files
authored
Removed litellm from dependencies. (#1300)
This cleans up all remainig references to `litellm` by removing code no longer used. There's still other code that can be refactored and removed, which I'll do in another PR. I took the chance to ship one major fix for a bug in ollama handling of single-response requests, as well as some minor cleanups.
1 parent 38cf3c3 commit dc2ceb0

File tree

11 files changed

+84
-1536
lines changed

11 files changed

+84
-1536
lines changed

poetry.lock

+13-1,076
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ PyYAML = "==6.0.2"
1414
fastapi = "==0.115.11"
1515
uvicorn = "==0.34.0"
1616
structlog = "==25.2.0"
17-
litellm = "==1.63.0"
1817
llama_cpp_python = "==0.3.5"
1918
cryptography = "==44.0.2"
2019
sqlalchemy = "==2.0.39"
@@ -50,7 +49,6 @@ ruff = "==0.11.0"
5049
bandit = "==1.8.3"
5150
build = "==1.2.2.post1"
5251
wheel = "==0.45.1"
53-
litellm = "==1.63.0"
5452
pytest-asyncio = "==0.25.3"
5553
llama_cpp_python = "==0.3.5"
5654
scikit-learn = "==1.6.1"

src/codegate/muxing/adapter.py

+15-268
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,10 @@
1-
import json
2-
import uuid
3-
from abc import ABC, abstractmethod
4-
from typing import Callable, Dict, Union
51
from urllib.parse import urljoin
62

73
import structlog
8-
from fastapi.responses import JSONResponse, StreamingResponse
9-
from litellm import ModelResponse
10-
from litellm.types.utils import Delta, StreamingChoices
114

125
from codegate.config import Config
136
from codegate.db import models as db_models
147
from codegate.muxing import rulematcher
15-
from codegate.muxing.ollama_mappers import (
16-
openai_chunk_from_ollama_chat,
17-
openai_chunk_from_ollama_generate,
18-
)
19-
from codegate.types.ollama import StreamingChatCompletion as OllamaStreamingChatCompletion
20-
from codegate.types.ollama import StreamingGenerateCompletion as OllamaStreamingGenerateCompletion
218

229
logger = structlog.get_logger("codegate")
2310

@@ -35,260 +22,20 @@ def get_llamacpp_models_folder():
3522
return override if override else "./codegate_volume/models"
3623

3724

38-
class BodyAdapter:
39-
"""
40-
Format the body to the destination provider format.
25+
def get_provider_formatted_url(model_route: rulematcher.ModelRoute) -> str:
26+
"""Get the provider formatted URL to use in base_url. Note this value comes from DB"""
27+
if model_route.endpoint.provider_type in [
28+
db_models.ProviderType.openai,
29+
db_models.ProviderType.vllm,
30+
]:
31+
return urljoin(model_route.endpoint.endpoint, "/v1")
32+
if model_route.endpoint.provider_type == db_models.ProviderType.openrouter:
33+
return urljoin(model_route.endpoint.endpoint, "/api/v1")
34+
if model_route.endpoint.provider_type == db_models.ProviderType.llamacpp:
35+
return get_llamacpp_models_folder()
36+
return model_route.endpoint.endpoint
4137

42-
We expect the body to always be in OpenAI format. We need to configure the client
43-
to send and expect OpenAI format. Here we just need to set the destination provider info.
44-
"""
4538

46-
def _get_provider_formatted_url(self, model_route: rulematcher.ModelRoute) -> str:
47-
"""Get the provider formatted URL to use in base_url. Note this value comes from DB"""
48-
if model_route.endpoint.provider_type in [
49-
db_models.ProviderType.openai,
50-
db_models.ProviderType.vllm,
51-
]:
52-
return urljoin(model_route.endpoint.endpoint, "/v1")
53-
if model_route.endpoint.provider_type == db_models.ProviderType.openrouter:
54-
return urljoin(model_route.endpoint.endpoint, "/api/v1")
55-
if model_route.endpoint.provider_type == db_models.ProviderType.llamacpp:
56-
return get_llamacpp_models_folder()
57-
return model_route.endpoint.endpoint
58-
59-
def get_destination_info(self, model_route: rulematcher.ModelRoute) -> dict:
60-
"""Set the destination provider info."""
61-
return model_route.model.name, self._get_provider_formatted_url(model_route)
62-
63-
64-
class OutputFormatter(ABC):
65-
66-
@property
67-
@abstractmethod
68-
def provider_format_funcs(self) -> Dict[str, Callable]:
69-
"""
70-
Return the provider specific format functions. All providers format functions should
71-
return the chunk in OpenAI format.
72-
"""
73-
pass
74-
75-
@abstractmethod
76-
def format(
77-
self, response: Union[StreamingResponse, JSONResponse], dest_prov: db_models.ProviderType
78-
) -> Union[StreamingResponse, JSONResponse]:
79-
"""Format the response to the client."""
80-
pass
81-
82-
83-
class StreamChunkFormatter(OutputFormatter):
84-
"""
85-
Format a single chunk from a stream to OpenAI format.
86-
We need to configure the client to expect the OpenAI format.
87-
In Continue this means setting "provider": "openai" in the config json file.
88-
"""
89-
90-
@property
91-
@abstractmethod
92-
def provider_format_funcs(self) -> Dict[str, Callable]:
93-
"""
94-
Return the provider specific format functions. All providers format functions should
95-
return the chunk in OpenAI format.
96-
"""
97-
pass
98-
99-
def _clean_chunk(self, chunk: str) -> str:
100-
"""Clean the chunk from the "data:" and any extra characters."""
101-
# Find the first position of 'data:' and add 5 characters to skip 'data:'
102-
start_pos = chunk.find("data:") + 5
103-
cleaned_chunk = chunk[start_pos:].strip()
104-
return cleaned_chunk
105-
106-
def _format_openai(self, chunk: str) -> str:
107-
"""
108-
The chunk is already in OpenAI format. To standarize remove the "data:" prefix.
109-
110-
This function is used by both chat and FIM formatters
111-
"""
112-
return self._clean_chunk(chunk)
113-
114-
def _format_antropic(self, chunk: str) -> str:
115-
"""
116-
Format the Anthropic chunk to OpenAI format.
117-
118-
This function is used by both chat and FIM formatters
119-
"""
120-
cleaned_chunk = self._clean_chunk(chunk)
121-
try:
122-
# Use `strict=False` to allow the JSON payload to contain
123-
# newlines, tabs and other valid characters that might
124-
# come from Anthropic returning code.
125-
chunk_dict = json.loads(cleaned_chunk, strict=False)
126-
except Exception as e:
127-
logger.warning(f"Error parsing Anthropic chunk: {chunk}. Error: {e}")
128-
return cleaned_chunk.strip()
129-
130-
msg_type = chunk_dict.get("type", "")
131-
132-
finish_reason = None
133-
if msg_type == "message_stop":
134-
finish_reason = "stop"
135-
136-
# In type == "content_block_start" the content comes in "content_block"
137-
# In type == "content_block_delta" the content comes in "delta"
138-
msg_content_dict = chunk_dict.get("delta", {}) or chunk_dict.get("content_block", {})
139-
# We couldn't obtain the content from the chunk. Skip it.
140-
if not msg_content_dict:
141-
return ""
142-
msg_content = msg_content_dict.get("text", "")
143-
144-
open_ai_chunk = ModelResponse(
145-
id=f"anthropic-chat-{str(uuid.uuid4())}",
146-
model="anthropic-muxed-model",
147-
object="chat.completion.chunk",
148-
choices=[
149-
StreamingChoices(
150-
finish_reason=finish_reason,
151-
index=0,
152-
delta=Delta(content=msg_content, role="assistant"),
153-
logprobs=None,
154-
)
155-
],
156-
)
157-
158-
try:
159-
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
160-
except Exception as e:
161-
logger.warning(f"Error serializing Anthropic chunk: {chunk}. Error: {e}")
162-
return cleaned_chunk.strip()
163-
164-
def _format_as_openai_chunk(self, formatted_chunk: str) -> str:
165-
"""Format the chunk as OpenAI chunk. This is the format how the clients expect the data."""
166-
chunk_to_send = f"data: {formatted_chunk}\n\n"
167-
return chunk_to_send
168-
169-
async def _format_streaming_response(
170-
self, response: StreamingResponse, dest_prov: db_models.ProviderType
171-
):
172-
"""Format the streaming response to OpenAI format."""
173-
format_func = self.provider_format_funcs.get(dest_prov)
174-
openai_chunk = None
175-
try:
176-
async for chunk in response.body_iterator:
177-
openai_chunk = format_func(chunk)
178-
# Sometimes for Anthropic we couldn't get content from the chunk. Skip it.
179-
if not openai_chunk:
180-
continue
181-
yield self._format_as_openai_chunk(openai_chunk)
182-
except Exception as e:
183-
logger.error(f"Error sending chunk in muxing: {e}")
184-
yield self._format_as_openai_chunk(str(e))
185-
finally:
186-
# Make sure the last chunk is always [DONE]
187-
if openai_chunk and "[DONE]" not in openai_chunk:
188-
yield self._format_as_openai_chunk("[DONE]")
189-
190-
def format(
191-
self, response: StreamingResponse, dest_prov: db_models.ProviderType
192-
) -> StreamingResponse:
193-
"""Format the response to the client."""
194-
return StreamingResponse(
195-
self._format_streaming_response(response, dest_prov),
196-
status_code=response.status_code,
197-
headers=response.headers,
198-
background=response.background,
199-
media_type=response.media_type,
200-
)
201-
202-
203-
class ChatStreamChunkFormatter(StreamChunkFormatter):
204-
"""
205-
Format a single chunk from a stream to OpenAI format given that the request was a chat.
206-
"""
207-
208-
@property
209-
def provider_format_funcs(self) -> Dict[str, Callable]:
210-
"""
211-
Return the provider specific format functions. All providers format functions should
212-
return the chunk in OpenAI format.
213-
"""
214-
return {
215-
db_models.ProviderType.ollama: self._format_ollama,
216-
db_models.ProviderType.openai: self._format_openai,
217-
db_models.ProviderType.anthropic: self._format_antropic,
218-
# Our Lllamacpp provider emits OpenAI chunks
219-
db_models.ProviderType.llamacpp: self._format_openai,
220-
# OpenRouter is a dialect of OpenAI
221-
db_models.ProviderType.openrouter: self._format_openai,
222-
# VLLM is a dialect of OpenAI
223-
db_models.ProviderType.vllm: self._format_openai,
224-
}
225-
226-
def _format_ollama(self, chunk: str) -> str:
227-
"""Format the Ollama chunk to OpenAI format."""
228-
try:
229-
chunk_dict = json.loads(chunk)
230-
ollama_chunk = OllamaStreamingChatCompletion.model_validate(chunk_dict)
231-
open_ai_chunk = openai_chunk_from_ollama_chat(ollama_chunk)
232-
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
233-
except Exception as e:
234-
# Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when
235-
# talking to Cline or Kodu. If that's the case we use the format_openai function.
236-
if "data:" in chunk:
237-
return self._format_openai(chunk)
238-
logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}")
239-
return chunk
240-
241-
242-
class FimStreamChunkFormatter(StreamChunkFormatter):
243-
244-
@property
245-
def provider_format_funcs(self) -> Dict[str, Callable]:
246-
"""
247-
Return the provider specific format functions. All providers format functions should
248-
return the chunk in OpenAI format.
249-
"""
250-
return {
251-
db_models.ProviderType.ollama: self._format_ollama,
252-
db_models.ProviderType.openai: self._format_openai,
253-
# Our Lllamacpp provider emits OpenAI chunks
254-
db_models.ProviderType.llamacpp: self._format_openai,
255-
# OpenRouter is a dialect of OpenAI
256-
db_models.ProviderType.openrouter: self._format_openai,
257-
# VLLM is a dialect of OpenAI
258-
db_models.ProviderType.vllm: self._format_openai,
259-
db_models.ProviderType.anthropic: self._format_antropic,
260-
}
261-
262-
def _format_ollama(self, chunk: str) -> str:
263-
"""Format the Ollama chunk to OpenAI format."""
264-
try:
265-
chunk_dict = json.loads(chunk)
266-
ollama_chunk = OllamaStreamingGenerateCompletion.model_validate(chunk_dict)
267-
open_ai_chunk = openai_chunk_from_ollama_generate(ollama_chunk)
268-
return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
269-
except Exception as e:
270-
print("Error formatting Ollama chunk: ", chunk, e)
271-
return chunk
272-
273-
274-
class ResponseAdapter:
275-
276-
def _get_formatter(
277-
self, response: Union[StreamingResponse, JSONResponse], is_fim_request: bool
278-
) -> OutputFormatter:
279-
"""Get the formatter based on the request type."""
280-
if isinstance(response, StreamingResponse):
281-
if is_fim_request:
282-
return FimStreamChunkFormatter()
283-
return ChatStreamChunkFormatter()
284-
raise MuxingAdapterError("Only streaming responses are supported.")
285-
286-
def format_response_to_client(
287-
self,
288-
response: Union[StreamingResponse, JSONResponse],
289-
dest_prov: db_models.ProviderType,
290-
is_fim_request: bool,
291-
) -> Union[StreamingResponse, JSONResponse]:
292-
"""Format the response to the client."""
293-
stream_formatter = self._get_formatter(response, is_fim_request)
294-
return stream_formatter.format(response, dest_prov)
39+
def get_destination_info(model_route: rulematcher.ModelRoute) -> dict:
40+
"""Set the destination provider info."""
41+
return model_route.model.name, get_provider_formatted_url(model_route)

src/codegate/muxing/router.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from codegate.db.models import ProviderType
1010
from codegate.muxing import models as mux_models
1111
from codegate.muxing import rulematcher
12-
from codegate.muxing.adapter import BodyAdapter, ResponseAdapter
12+
from codegate.muxing.adapter import get_destination_info
1313
from codegate.providers.fim_analyzer import FIMAnalyzer
1414
from codegate.providers.registry import ProviderRegistry
1515
from codegate.types import anthropic, ollama, openai
@@ -39,11 +39,9 @@ class MuxRouter:
3939

4040
def __init__(self, provider_registry: ProviderRegistry):
4141
self._ws_crud = WorkspaceCrud()
42-
self._body_adapter = BodyAdapter()
4342
self.router = APIRouter()
4443
self._setup_routes()
4544
self._provider_registry = provider_registry
46-
self._response_adapter = ResponseAdapter()
4745

4846
@property
4947
def route_name(self) -> str:
@@ -128,7 +126,7 @@ async def route_to_dest_provider(
128126

129127
# 2. Map the request body to the destination provider format.
130128
rest_of_path = self._ensure_path_starts_with_slash(rest_of_path)
131-
model, base_url = self._body_adapter.get_destination_info(model_route)
129+
model, base_url = get_destination_info(model_route)
132130

133131
# 3. Run pipeline. Selecting the correct destination provider.
134132
provider = self._provider_registry.get_provider(model_route.endpoint.provider_type)

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/stacklok/codegate/commit/dc2ceb04b3f24fbc6de873103c512d21629826a2

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy