feat: Implement AI Chat Completions API with streaming support and models list endpoint
- Enhanced the AI Chat Completions API to support true streaming using async generators and proper SSE headers. - Updated endpoint paths to align with OpenAI's API versioning. - Improved logging for request details and error handling. - Added a new AI Models List API to return available models compatible with chat completions. - Refactored code for better readability and maintainability, including the extraction of common functionalities. - Introduced a VMH-specific Chat Completions API with similar features and structure.
This commit is contained in:
124
steps/ai/models_list_api_step.py
Normal file
124
steps/ai/models_list_api_step.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""AI Models List API
|
||||
|
||||
OpenAI-compatible models list endpoint for OpenWebUI and other clients.
|
||||
Returns all available AI models that can be used with /ai/chat/completions.
|
||||
"""
|
||||
import time
|
||||
from typing import Any
|
||||
from motia import FlowContext, http, ApiRequest, ApiResponse
|
||||
|
||||
|
||||
config = {
|
||||
"name": "AI Models List API",
|
||||
"description": "OpenAI-compatible models endpoint - lists available AI models",
|
||||
"flows": ["ai-general"],
|
||||
"triggers": [
|
||||
http("GET", "/ai/v1/models"),
|
||||
http("GET", "/v1/models"),
|
||||
http("GET", "/ai/models")
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
||||
"""
|
||||
OpenAI-compatible models list endpoint.
|
||||
|
||||
Returns list of available models for OpenWebUI and other clients.
|
||||
|
||||
Response Format (OpenAI compatible):
|
||||
{
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": "grok-4.20-beta-0309-reasoning",
|
||||
"object": "model",
|
||||
"created": 1735689600,
|
||||
"owned_by": "xai",
|
||||
"permission": [],
|
||||
"root": "grok-4.20-beta-0309-reasoning",
|
||||
"parent": null
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
ctx.logger.info("📋 Models list requested")
|
||||
|
||||
try:
|
||||
# Define available models
|
||||
# These correspond to models supported by /ai/chat/completions
|
||||
current_timestamp = int(time.time())
|
||||
|
||||
models = [
|
||||
{
|
||||
"id": "grok-4.20-beta-0309-reasoning",
|
||||
"object": "model",
|
||||
"created": current_timestamp,
|
||||
"owned_by": "xai",
|
||||
"permission": [],
|
||||
"root": "grok-4.20-beta-0309-reasoning",
|
||||
"parent": None,
|
||||
"capabilities": {
|
||||
"file_search": True,
|
||||
"web_search": True,
|
||||
"streaming": True,
|
||||
"reasoning": True
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "grok-4.20-multi-agent-beta-0309",
|
||||
"object": "model",
|
||||
"created": current_timestamp,
|
||||
"owned_by": "xai",
|
||||
"permission": [],
|
||||
"root": "grok-4.20-multi-agent-beta-0309",
|
||||
"parent": None,
|
||||
"capabilities": {
|
||||
"file_search": True,
|
||||
"web_search": True,
|
||||
"streaming": True,
|
||||
"reasoning": True,
|
||||
"multi_agent": True
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "grok-4-1-fast-reasoning",
|
||||
"object": "model",
|
||||
"created": current_timestamp,
|
||||
"owned_by": "xai",
|
||||
"permission": [],
|
||||
"root": "grok-4-1-fast-reasoning",
|
||||
"parent": None,
|
||||
"capabilities": {
|
||||
"file_search": True,
|
||||
"web_search": True,
|
||||
"streaming": True,
|
||||
"reasoning": True
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Build OpenAI-compatible response
|
||||
response_body = {
|
||||
"object": "list",
|
||||
"data": models
|
||||
}
|
||||
|
||||
ctx.logger.info(f"✅ Returned {len(models)} models")
|
||||
|
||||
return ApiResponse(
|
||||
status=200,
|
||||
body=response_body
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
ctx.logger.error(f"❌ Error listing models: {e}", exc_info=True)
|
||||
return ApiResponse(
|
||||
status=500,
|
||||
body={
|
||||
"error": {
|
||||
"message": str(e),
|
||||
"type": "server_error"
|
||||
}
|
||||
}
|
||||
)
|
||||
Reference in New Issue
Block a user