fix wrong para position for the newly added params for AOAI (#30849)

yuanzhuangyuanzhuang · Yuan Zhuang · shrgupta773 · web-flow · commit b26a190235f1 · 2024-10-04T10:25:58.000-07:00
* fix wrong para position for the newly added params

* Add parallel_tool_calls to assistant api

* bug fix

---------

Co-authored-by: Yuan Zhuang &lt;yuzhua@microsoft.com&gt;
Co-authored-by: Shruti Gupta &lt;shrgupta@microsoft.com&gt;
diff --git a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-09-01-preview/inference.json b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-09-01-preview/inference.json
@@ -3739,6 +3739,10 @@
                 "nullable": true,
                 "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n"
               },
+              "parallel_tool_calls": {
+                "$ref": "#/components/schemas/ParallelToolCalls",
+                "nullable": true
+              },
               "response_format": {
                 "description": "An object specifying the format that the model must output. Compatible with [GPT-4o](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-models), [GPT-4o mini](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-models), [GPT-4 Turbo](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-and-gpt-4-turbo-models) and all [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-35) Turbo models newer than `gpt-3.5-turbo-1106`.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.\n",
                 "oneOf": [
@@ -3785,6 +3789,10 @@
                 "nullable": true,
                 "default": false
               },
+              "stream_options": {
+                "$ref": "#/components/schemas/chatCompletionStreamOptions",
+                "nullable": true
+              },
               "temperature": {
                 "type": "number",
                 "minimum": 0,
@@ -8202,10 +8210,6 @@
             "nullable": true,
             "description": "If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.\n"
           },
-          "stream_options": {
-            "$ref": "#/components/schemas/chatCompletionStreamOptions",
-            "nullable": true
-          },
           "max_prompt_tokens": {
             "type": "integer",
             "nullable": true,
@@ -8226,6 +8230,9 @@
             "$ref": "#/components/schemas/assistantsApiToolChoiceOption",
             "nullable": true
           },
+          "parallel_tool_calls": {
+            "$ref": "#/components/schemas/ParallelToolCalls"
+          },
           "response_format": {
             "$ref": "#/components/schemas/assistantsApiResponseFormatOption",
             "nullable": true
@@ -8486,6 +8493,9 @@
             "$ref": "#/components/schemas/assistantsApiToolChoiceOption",
             "nullable": true
           },
+          "parallel_tool_calls": {
+            "$ref": "#/components/schemas/ParallelToolCalls"
+          },
           "response_format": {
             "$ref": "#/components/schemas/assistantsApiResponseFormatOption",
             "nullable": true
diff --git a/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-09-01-preview/inference.yaml b/specification/cognitiveservices/data-plane/AzureOpenAI/inference/preview/2024-09-01-preview/inference.yaml
@@ -2488,6 +2488,8 @@ components:
               example: 1
               nullable: true
               description: How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.
+            parallel_tool_calls:
+              $ref: "#/components/schemas/ParallelToolCalls"
             presence_penalty:
               type: number
               default: 0
@@ -5723,6 +5725,8 @@ components:
         tool_choice:
           $ref: "#/components/schemas/assistantsApiToolChoiceOption"
           nullable: true
+        parallel_tool_calls:
+          $ref: "#/components/schemas/ParallelToolCalls"
         response_format:
           $ref: "#/components/schemas/assistantsApiResponseFormatOption"
           nullable: true
@@ -5922,6 +5926,8 @@ components:
         tool_choice:
           $ref: "#/components/schemas/assistantsApiToolChoiceOption"
           nullable: true
+        parallel_tool_calls":
+          $ref: "#/components/schemas/ParallelToolCalls"
         response_format:
           $ref: "#/components/schemas/assistantsApiResponseFormatOption"
           nullable: true