feat: Implement AI Chat Completions API with streaming support and models list endpoint

- Enhanced the AI Chat Completions API to support true streaming using async generators and proper SSE headers. - Updated endpoint paths to align with OpenAI's API versioning. - Improved logging for request details and error handling. - Added a new AI Models List API to return available models compatible with chat completions. - Refactored code for better readability and maintainability, including the extraction of common functionalities. - Introduced a VMH-specific Chat Completions API with similar features and structure.
2026-03-18 21:30:59 +00:00
parent 949a5fd69c
commit 69f0c6a44d
3 changed files with 804 additions and 405 deletions
--- a/steps/ai/models_list_api_step.py
+++ b/steps/ai/models_list_api_step.py
@@ -0,0 +1,124 @@
+"""AI Models List API
+
+OpenAI-compatible models list endpoint for OpenWebUI and other clients.
+Returns all available AI models that can be used with /ai/chat/completions.
+"""
+import time
+from typing import Any
+from motia import FlowContext, http, ApiRequest, ApiResponse
+
+
+config = {
+    "name": "AI Models List API",
+    "description": "OpenAI-compatible models endpoint - lists available AI models",
+    "flows": ["ai-general"],
+    "triggers": [
+        http("GET", "/ai/v1/models"),
+        http("GET", "/v1/models"),
+        http("GET", "/ai/models")
+    ],
+}
+
+
+async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
+    """
+    OpenAI-compatible models list endpoint.
+    
+    Returns list of available models for OpenWebUI and other clients.
+    
+    Response Format (OpenAI compatible):
+        {
+            "object": "list",
+            "data": [
+                {
+                    "id": "grok-4.20-beta-0309-reasoning",
+                    "object": "model",
+                    "created": 1735689600,
+                    "owned_by": "xai",
+                    "permission": [],
+                    "root": "grok-4.20-beta-0309-reasoning",
+                    "parent": null
+                }
+            ]
+        }
+    """
+    ctx.logger.info("📋 Models list requested")
+    
+    try:
+        # Define available models
+        # These correspond to models supported by /ai/chat/completions
+        current_timestamp = int(time.time())
+        
+        models = [
+            {
+                "id": "grok-4.20-beta-0309-reasoning",
+                "object": "model",
+                "created": current_timestamp,
+                "owned_by": "xai",
+                "permission": [],
+                "root": "grok-4.20-beta-0309-reasoning",
+                "parent": None,
+                "capabilities": {
+                    "file_search": True,
+                    "web_search": True,
+                    "streaming": True,
+                    "reasoning": True
+                }
+            },
+            {
+                "id": "grok-4.20-multi-agent-beta-0309",
+                "object": "model",
+                "created": current_timestamp,
+                "owned_by": "xai",
+                "permission": [],
+                "root": "grok-4.20-multi-agent-beta-0309",
+                "parent": None,
+                "capabilities": {
+                    "file_search": True,
+                    "web_search": True,
+                    "streaming": True,
+                    "reasoning": True,
+                    "multi_agent": True
+                }
+            },
+            {
+                "id": "grok-4-1-fast-reasoning",
+                "object": "model",
+                "created": current_timestamp,
+                "owned_by": "xai",
+                "permission": [],
+                "root": "grok-4-1-fast-reasoning",
+                "parent": None,
+                "capabilities": {
+                    "file_search": True,
+                    "web_search": True,
+                    "streaming": True,
+                    "reasoning": True
+                }
+            }
+        ]
+        
+        # Build OpenAI-compatible response
+        response_body = {
+            "object": "list",
+            "data": models
+        }
+        
+        ctx.logger.info(f"✅ Returned {len(models)} models")
+        
+        return ApiResponse(
+            status=200,
+            body=response_body
+        )
+    
+    except Exception as e:
+        ctx.logger.error(f"❌ Error listing models: {e}", exc_info=True)
+        return ApiResponse(
+            status=500,
+            body={
+                "error": {
+                    "message": str(e),
+                    "type": "server_error"
+                }
+            }
+        )