1
- import json
2
- import uuid
3
- from abc import ABC , abstractmethod
4
- from typing import Callable , Dict , Union
5
1
from urllib .parse import urljoin
6
2
7
3
import structlog
8
- from fastapi .responses import JSONResponse , StreamingResponse
9
- from litellm import ModelResponse
10
- from litellm .types .utils import Delta , StreamingChoices
11
4
12
5
from codegate .config import Config
13
6
from codegate .db import models as db_models
14
7
from codegate .muxing import rulematcher
15
- from codegate .muxing .ollama_mappers import (
16
- openai_chunk_from_ollama_chat ,
17
- openai_chunk_from_ollama_generate ,
18
- )
19
- from codegate .types .ollama import StreamingChatCompletion as OllamaStreamingChatCompletion
20
- from codegate .types .ollama import StreamingGenerateCompletion as OllamaStreamingGenerateCompletion
21
8
22
9
logger = structlog .get_logger ("codegate" )
23
10
@@ -35,260 +22,20 @@ def get_llamacpp_models_folder():
35
22
return override if override else "./codegate_volume/models"
36
23
37
24
38
- class BodyAdapter :
39
- """
40
- Format the body to the destination provider format.
25
+ def get_provider_formatted_url (model_route : rulematcher .ModelRoute ) -> str :
26
+ """Get the provider formatted URL to use in base_url. Note this value comes from DB"""
27
+ if model_route .endpoint .provider_type in [
28
+ db_models .ProviderType .openai ,
29
+ db_models .ProviderType .vllm ,
30
+ ]:
31
+ return urljoin (model_route .endpoint .endpoint , "/v1" )
32
+ if model_route .endpoint .provider_type == db_models .ProviderType .openrouter :
33
+ return urljoin (model_route .endpoint .endpoint , "/api/v1" )
34
+ if model_route .endpoint .provider_type == db_models .ProviderType .llamacpp :
35
+ return get_llamacpp_models_folder ()
36
+ return model_route .endpoint .endpoint
41
37
42
- We expect the body to always be in OpenAI format. We need to configure the client
43
- to send and expect OpenAI format. Here we just need to set the destination provider info.
44
- """
45
38
46
- def _get_provider_formatted_url (self , model_route : rulematcher .ModelRoute ) -> str :
47
- """Get the provider formatted URL to use in base_url. Note this value comes from DB"""
48
- if model_route .endpoint .provider_type in [
49
- db_models .ProviderType .openai ,
50
- db_models .ProviderType .vllm ,
51
- ]:
52
- return urljoin (model_route .endpoint .endpoint , "/v1" )
53
- if model_route .endpoint .provider_type == db_models .ProviderType .openrouter :
54
- return urljoin (model_route .endpoint .endpoint , "/api/v1" )
55
- if model_route .endpoint .provider_type == db_models .ProviderType .llamacpp :
56
- return get_llamacpp_models_folder ()
57
- return model_route .endpoint .endpoint
58
-
59
- def get_destination_info (self , model_route : rulematcher .ModelRoute ) -> dict :
60
- """Set the destination provider info."""
61
- return model_route .model .name , self ._get_provider_formatted_url (model_route )
62
-
63
-
64
- class OutputFormatter (ABC ):
65
-
66
- @property
67
- @abstractmethod
68
- def provider_format_funcs (self ) -> Dict [str , Callable ]:
69
- """
70
- Return the provider specific format functions. All providers format functions should
71
- return the chunk in OpenAI format.
72
- """
73
- pass
74
-
75
- @abstractmethod
76
- def format (
77
- self , response : Union [StreamingResponse , JSONResponse ], dest_prov : db_models .ProviderType
78
- ) -> Union [StreamingResponse , JSONResponse ]:
79
- """Format the response to the client."""
80
- pass
81
-
82
-
83
- class StreamChunkFormatter (OutputFormatter ):
84
- """
85
- Format a single chunk from a stream to OpenAI format.
86
- We need to configure the client to expect the OpenAI format.
87
- In Continue this means setting "provider": "openai" in the config json file.
88
- """
89
-
90
- @property
91
- @abstractmethod
92
- def provider_format_funcs (self ) -> Dict [str , Callable ]:
93
- """
94
- Return the provider specific format functions. All providers format functions should
95
- return the chunk in OpenAI format.
96
- """
97
- pass
98
-
99
- def _clean_chunk (self , chunk : str ) -> str :
100
- """Clean the chunk from the "data:" and any extra characters."""
101
- # Find the first position of 'data:' and add 5 characters to skip 'data:'
102
- start_pos = chunk .find ("data:" ) + 5
103
- cleaned_chunk = chunk [start_pos :].strip ()
104
- return cleaned_chunk
105
-
106
- def _format_openai (self , chunk : str ) -> str :
107
- """
108
- The chunk is already in OpenAI format. To standarize remove the "data:" prefix.
109
-
110
- This function is used by both chat and FIM formatters
111
- """
112
- return self ._clean_chunk (chunk )
113
-
114
- def _format_antropic (self , chunk : str ) -> str :
115
- """
116
- Format the Anthropic chunk to OpenAI format.
117
-
118
- This function is used by both chat and FIM formatters
119
- """
120
- cleaned_chunk = self ._clean_chunk (chunk )
121
- try :
122
- # Use `strict=False` to allow the JSON payload to contain
123
- # newlines, tabs and other valid characters that might
124
- # come from Anthropic returning code.
125
- chunk_dict = json .loads (cleaned_chunk , strict = False )
126
- except Exception as e :
127
- logger .warning (f"Error parsing Anthropic chunk: { chunk } . Error: { e } " )
128
- return cleaned_chunk .strip ()
129
-
130
- msg_type = chunk_dict .get ("type" , "" )
131
-
132
- finish_reason = None
133
- if msg_type == "message_stop" :
134
- finish_reason = "stop"
135
-
136
- # In type == "content_block_start" the content comes in "content_block"
137
- # In type == "content_block_delta" the content comes in "delta"
138
- msg_content_dict = chunk_dict .get ("delta" , {}) or chunk_dict .get ("content_block" , {})
139
- # We couldn't obtain the content from the chunk. Skip it.
140
- if not msg_content_dict :
141
- return ""
142
- msg_content = msg_content_dict .get ("text" , "" )
143
-
144
- open_ai_chunk = ModelResponse (
145
- id = f"anthropic-chat-{ str (uuid .uuid4 ())} " ,
146
- model = "anthropic-muxed-model" ,
147
- object = "chat.completion.chunk" ,
148
- choices = [
149
- StreamingChoices (
150
- finish_reason = finish_reason ,
151
- index = 0 ,
152
- delta = Delta (content = msg_content , role = "assistant" ),
153
- logprobs = None ,
154
- )
155
- ],
156
- )
157
-
158
- try :
159
- return open_ai_chunk .model_dump_json (exclude_none = True , exclude_unset = True )
160
- except Exception as e :
161
- logger .warning (f"Error serializing Anthropic chunk: { chunk } . Error: { e } " )
162
- return cleaned_chunk .strip ()
163
-
164
- def _format_as_openai_chunk (self , formatted_chunk : str ) -> str :
165
- """Format the chunk as OpenAI chunk. This is the format how the clients expect the data."""
166
- chunk_to_send = f"data: { formatted_chunk } \n \n "
167
- return chunk_to_send
168
-
169
- async def _format_streaming_response (
170
- self , response : StreamingResponse , dest_prov : db_models .ProviderType
171
- ):
172
- """Format the streaming response to OpenAI format."""
173
- format_func = self .provider_format_funcs .get (dest_prov )
174
- openai_chunk = None
175
- try :
176
- async for chunk in response .body_iterator :
177
- openai_chunk = format_func (chunk )
178
- # Sometimes for Anthropic we couldn't get content from the chunk. Skip it.
179
- if not openai_chunk :
180
- continue
181
- yield self ._format_as_openai_chunk (openai_chunk )
182
- except Exception as e :
183
- logger .error (f"Error sending chunk in muxing: { e } " )
184
- yield self ._format_as_openai_chunk (str (e ))
185
- finally :
186
- # Make sure the last chunk is always [DONE]
187
- if openai_chunk and "[DONE]" not in openai_chunk :
188
- yield self ._format_as_openai_chunk ("[DONE]" )
189
-
190
- def format (
191
- self , response : StreamingResponse , dest_prov : db_models .ProviderType
192
- ) -> StreamingResponse :
193
- """Format the response to the client."""
194
- return StreamingResponse (
195
- self ._format_streaming_response (response , dest_prov ),
196
- status_code = response .status_code ,
197
- headers = response .headers ,
198
- background = response .background ,
199
- media_type = response .media_type ,
200
- )
201
-
202
-
203
- class ChatStreamChunkFormatter (StreamChunkFormatter ):
204
- """
205
- Format a single chunk from a stream to OpenAI format given that the request was a chat.
206
- """
207
-
208
- @property
209
- def provider_format_funcs (self ) -> Dict [str , Callable ]:
210
- """
211
- Return the provider specific format functions. All providers format functions should
212
- return the chunk in OpenAI format.
213
- """
214
- return {
215
- db_models .ProviderType .ollama : self ._format_ollama ,
216
- db_models .ProviderType .openai : self ._format_openai ,
217
- db_models .ProviderType .anthropic : self ._format_antropic ,
218
- # Our Lllamacpp provider emits OpenAI chunks
219
- db_models .ProviderType .llamacpp : self ._format_openai ,
220
- # OpenRouter is a dialect of OpenAI
221
- db_models .ProviderType .openrouter : self ._format_openai ,
222
- # VLLM is a dialect of OpenAI
223
- db_models .ProviderType .vllm : self ._format_openai ,
224
- }
225
-
226
- def _format_ollama (self , chunk : str ) -> str :
227
- """Format the Ollama chunk to OpenAI format."""
228
- try :
229
- chunk_dict = json .loads (chunk )
230
- ollama_chunk = OllamaStreamingChatCompletion .model_validate (chunk_dict )
231
- open_ai_chunk = openai_chunk_from_ollama_chat (ollama_chunk )
232
- return open_ai_chunk .model_dump_json (exclude_none = True , exclude_unset = True )
233
- except Exception as e :
234
- # Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when
235
- # talking to Cline or Kodu. If that's the case we use the format_openai function.
236
- if "data:" in chunk :
237
- return self ._format_openai (chunk )
238
- logger .warning (f"Error formatting Ollama chunk: { chunk } . Error: { e } " )
239
- return chunk
240
-
241
-
242
- class FimStreamChunkFormatter (StreamChunkFormatter ):
243
-
244
- @property
245
- def provider_format_funcs (self ) -> Dict [str , Callable ]:
246
- """
247
- Return the provider specific format functions. All providers format functions should
248
- return the chunk in OpenAI format.
249
- """
250
- return {
251
- db_models .ProviderType .ollama : self ._format_ollama ,
252
- db_models .ProviderType .openai : self ._format_openai ,
253
- # Our Lllamacpp provider emits OpenAI chunks
254
- db_models .ProviderType .llamacpp : self ._format_openai ,
255
- # OpenRouter is a dialect of OpenAI
256
- db_models .ProviderType .openrouter : self ._format_openai ,
257
- # VLLM is a dialect of OpenAI
258
- db_models .ProviderType .vllm : self ._format_openai ,
259
- db_models .ProviderType .anthropic : self ._format_antropic ,
260
- }
261
-
262
- def _format_ollama (self , chunk : str ) -> str :
263
- """Format the Ollama chunk to OpenAI format."""
264
- try :
265
- chunk_dict = json .loads (chunk )
266
- ollama_chunk = OllamaStreamingGenerateCompletion .model_validate (chunk_dict )
267
- open_ai_chunk = openai_chunk_from_ollama_generate (ollama_chunk )
268
- return open_ai_chunk .model_dump_json (exclude_none = True , exclude_unset = True )
269
- except Exception as e :
270
- print ("Error formatting Ollama chunk: " , chunk , e )
271
- return chunk
272
-
273
-
274
- class ResponseAdapter :
275
-
276
- def _get_formatter (
277
- self , response : Union [StreamingResponse , JSONResponse ], is_fim_request : bool
278
- ) -> OutputFormatter :
279
- """Get the formatter based on the request type."""
280
- if isinstance (response , StreamingResponse ):
281
- if is_fim_request :
282
- return FimStreamChunkFormatter ()
283
- return ChatStreamChunkFormatter ()
284
- raise MuxingAdapterError ("Only streaming responses are supported." )
285
-
286
- def format_response_to_client (
287
- self ,
288
- response : Union [StreamingResponse , JSONResponse ],
289
- dest_prov : db_models .ProviderType ,
290
- is_fim_request : bool ,
291
- ) -> Union [StreamingResponse , JSONResponse ]:
292
- """Format the response to the client."""
293
- stream_formatter = self ._get_formatter (response , is_fim_request )
294
- return stream_formatter .format (response , dest_prov )
39
+ def get_destination_info (model_route : rulematcher .ModelRoute ) -> dict :
40
+ """Set the destination provider info."""
41
+ return model_route .model .name , get_provider_formatted_url (model_route )
0 commit comments