feat: Implement AI Chat Completions API with streaming support and models list endpoint
- Enhanced the AI Chat Completions API to support true streaming using async generators and proper SSE headers. - Updated endpoint paths to align with OpenAI's API versioning. - Improved logging for request details and error handling. - Added a new AI Models List API to return available models compatible with chat completions. - Refactored code for better readability and maintainability, including the extraction of common functionalities. - Introduced a VMH-specific Chat Completions API with similar features and structure.
This commit is contained in:
@@ -1,101 +1,53 @@
|
|||||||
"""AI Chat Completions API
|
"""AI Chat Completions API
|
||||||
|
|
||||||
Universal OpenAI-compatible Chat Completions API with xAI/LangChain Backend.
|
Universal OpenAI-compatible Chat Completions API with xAI/LangChain Backend.
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- File Search (RAG) via xAI Collections
|
- File Search (RAG) via xAI Collections
|
||||||
- Web Search via xAI web_search tool
|
- Web Search via xAI web_search tool
|
||||||
- Aktenzeichen-based automatic collection lookup
|
- Aktenzeichen-based automatic collection lookup
|
||||||
- Streaming & Non-Streaming support
|
- **Echtes Streaming** (async generator + proper SSE headers)
|
||||||
- Multiple tools simultaneously (file_search + web_search)
|
- Multiple tools simultaneously
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from motia import FlowContext, http, ApiRequest, ApiResponse
|
from motia import FlowContext, http, ApiRequest, ApiResponse
|
||||||
|
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
"name": "AI Chat Completions API",
|
"name": "AI Chat Completions API",
|
||||||
"description": "Universal OpenAI-compatible Chat Completions API with xAI backend, RAG, and web search",
|
"description": "Universal OpenAI-compatible Chat Completions API with xAI backend, RAG, and web search",
|
||||||
"flows": ["ai-general"],
|
"flows": ["ai-general"],
|
||||||
"triggers": [
|
"triggers": [
|
||||||
http("POST", "/ai/chat/completions")
|
http("POST", "/ai/v1/chat/completions"),
|
||||||
|
http("POST", "/v1/chat/completions")
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
||||||
"""
|
"""
|
||||||
OpenAI-compatible Chat Completions endpoint.
|
OpenAI-compatible Chat Completions endpoint mit **echtem** Streaming.
|
||||||
|
|
||||||
Request Body (OpenAI format):
|
|
||||||
{
|
|
||||||
"model": "grok-4.20-beta-0309-reasoning",
|
|
||||||
"messages": [
|
|
||||||
{"role": "system", "content": "You are helpful"},
|
|
||||||
{"role": "user", "content": "1234/56 Was ist der Stand?"}
|
|
||||||
],
|
|
||||||
"temperature": 0.7,
|
|
||||||
"max_tokens": 2000,
|
|
||||||
"stream": false,
|
|
||||||
"extra_body": {
|
|
||||||
"collection_id": "col_abc123", // Optional: override auto-detection
|
|
||||||
"enable_web_search": true, // Optional: enable web search (default: false)
|
|
||||||
"web_search_config": { // Optional: web search configuration
|
|
||||||
"allowed_domains": ["example.com"],
|
|
||||||
"excluded_domains": ["spam.com"],
|
|
||||||
"enable_image_understanding": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Aktenzeichen-Erkennung (Priority):
|
|
||||||
1. extra_body.collection_id (explicit override)
|
|
||||||
2. First user message starts with Aktenzeichen (e.g., "1234/56 ...")
|
|
||||||
3. Web-only mode if no collection_id (must enable_web_search)
|
|
||||||
|
|
||||||
Response (OpenAI format):
|
|
||||||
Non-Streaming:
|
|
||||||
{
|
|
||||||
"id": "chatcmpl-...",
|
|
||||||
"object": "chat.completion",
|
|
||||||
"created": 1234567890,
|
|
||||||
"model": "grok-4.20-beta-0309-reasoning",
|
|
||||||
"choices": [{
|
|
||||||
"index": 0,
|
|
||||||
"message": {"role": "assistant", "content": "..."},
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}],
|
|
||||||
"usage": {"prompt_tokens": X, "completion_tokens": Y, "total_tokens": Z}
|
|
||||||
}
|
|
||||||
|
|
||||||
Streaming (SSE):
|
|
||||||
data: {"id":"chatcmpl-...","choices":[{"delta":{"content":"Hello"},...}]}
|
|
||||||
data: {"id":"chatcmpl-...","choices":[{"delta":{"content":" world"},...}]}
|
|
||||||
data: {"choices":[{"delta":{},"finish_reason":"stop"}]}
|
|
||||||
data: [DONE]
|
|
||||||
"""
|
"""
|
||||||
from services.langchain_xai_service import LangChainXAIService
|
ctx.logger.info("=" * 80)
|
||||||
from services.aktenzeichen_utils import extract_aktenzeichen, normalize_aktenzeichen
|
ctx.logger.info("🤖 AI CHAT COMPLETIONS API – OPTIMIZED")
|
||||||
from services.espocrm import EspoCRMAPI
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
ctx.logger.info("=" * 80)
|
# Log request (sicher)
|
||||||
ctx.logger.info("🤖 AI CHAT COMPLETIONS API")
|
ctx.logger.info("📥 REQUEST DETAILS:")
|
||||||
ctx.logger.info("=" * 80)
|
if request.headers:
|
||||||
|
ctx.logger.info(" Headers:")
|
||||||
|
for header_name, header_value in request.headers.items():
|
||||||
|
if header_name.lower() == 'authorization':
|
||||||
|
ctx.logger.info(f" {header_name}: Bearer ***MASKED***")
|
||||||
|
else:
|
||||||
|
ctx.logger.info(f" {header_name}: {header_value}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Parse request body
|
# Parse body
|
||||||
body = request.body or {}
|
body = request.body or {}
|
||||||
|
|
||||||
if not isinstance(body, dict):
|
if not isinstance(body, dict):
|
||||||
ctx.logger.error(f"❌ Invalid request body type: {type(body)}")
|
return ApiResponse(status=400, body={'error': 'Request body must be JSON object'})
|
||||||
return ApiResponse(
|
|
||||||
status=400,
|
|
||||||
body={'error': 'Request body must be JSON object'}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract parameters
|
# Parameter extrahieren
|
||||||
model_name = body.get('model', 'grok-4.20-beta-0309-reasoning')
|
model_name = body.get('model', 'grok-4.20-beta-0309-reasoning')
|
||||||
messages = body.get('messages', [])
|
messages = body.get('messages', [])
|
||||||
temperature = body.get('temperature', 0.7)
|
temperature = body.get('temperature', 0.7)
|
||||||
@@ -103,102 +55,56 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
|||||||
stream = body.get('stream', False)
|
stream = body.get('stream', False)
|
||||||
extra_body = body.get('extra_body', {})
|
extra_body = body.get('extra_body', {})
|
||||||
|
|
||||||
# Web Search parameters (default: disabled)
|
enable_web_search = body.get('enable_web_search', extra_body.get('enable_web_search', False))
|
||||||
enable_web_search = extra_body.get('enable_web_search', False)
|
web_search_config = body.get('web_search_config', extra_body.get('web_search_config', {}))
|
||||||
web_search_config = extra_body.get('web_search_config', {})
|
|
||||||
|
|
||||||
ctx.logger.info(f"📋 Model: {model_name}")
|
ctx.logger.info(f"📋 Model: {model_name} | Stream: {stream} | Web Search: {enable_web_search}")
|
||||||
ctx.logger.info(f"📋 Messages: {len(messages)}")
|
|
||||||
ctx.logger.info(f"📋 Stream: {stream}")
|
|
||||||
ctx.logger.info(f"📋 Web Search: {'enabled' if enable_web_search else 'disabled'}")
|
|
||||||
if enable_web_search and web_search_config:
|
|
||||||
ctx.logger.debug(f"Web Search Config: {json.dumps(web_search_config, indent=2)}")
|
|
||||||
|
|
||||||
# Log full conversation messages
|
# Messages loggen (kurz)
|
||||||
ctx.logger.info("-" * 80)
|
ctx.logger.info("📨 MESSAGES:")
|
||||||
ctx.logger.info("📨 REQUEST MESSAGES:")
|
|
||||||
for i, msg in enumerate(messages, 1):
|
for i, msg in enumerate(messages, 1):
|
||||||
role = msg.get('role', 'unknown')
|
preview = (msg.get('content', '')[:120] + "...") if len(msg.get('content', '')) > 120 else msg.get('content', '')
|
||||||
content = msg.get('content', '')
|
ctx.logger.info(f" [{i}] {msg.get('role')}: {preview}")
|
||||||
preview = content[:150] + "..." if len(content) > 150 else content
|
|
||||||
ctx.logger.info(f" [{i}] {role}: {preview}")
|
|
||||||
ctx.logger.info("-" * 80)
|
|
||||||
|
|
||||||
# Validate messages
|
# === Collection + Aktenzeichen Logic (unverändert) ===
|
||||||
if not messages or not isinstance(messages, list):
|
|
||||||
ctx.logger.error("❌ Missing or invalid messages array")
|
|
||||||
return ApiResponse(
|
|
||||||
status=400,
|
|
||||||
body={'error': 'messages must be non-empty array'}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine collection_id (Priority: extra_body > Aktenzeichen > optional for web-only)
|
|
||||||
collection_id: Optional[str] = None
|
collection_id: Optional[str] = None
|
||||||
aktenzeichen: Optional[str] = None
|
aktenzeichen: Optional[str] = None
|
||||||
|
|
||||||
# Priority 1: Explicit collection_id in extra_body
|
if 'collection_id' in body:
|
||||||
if 'collection_id' in extra_body:
|
collection_id = body['collection_id']
|
||||||
|
elif 'custom_collection_id' in body:
|
||||||
|
collection_id = body['custom_collection_id']
|
||||||
|
elif 'collection_id' in extra_body:
|
||||||
collection_id = extra_body['collection_id']
|
collection_id = extra_body['collection_id']
|
||||||
ctx.logger.info(f"🔍 Collection ID from extra_body: {collection_id}")
|
|
||||||
|
|
||||||
# Priority 2: Extract Aktenzeichen from first user message
|
|
||||||
else:
|
else:
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
if msg.get('role') == 'user':
|
if msg.get('role') == 'user':
|
||||||
content = msg.get('content', '')
|
content = msg.get('content', '')
|
||||||
|
from services.aktenzeichen_utils import extract_aktenzeichen, normalize_aktenzeichen, remove_aktenzeichen
|
||||||
aktenzeichen_raw = extract_aktenzeichen(content)
|
aktenzeichen_raw = extract_aktenzeichen(content)
|
||||||
|
|
||||||
if aktenzeichen_raw:
|
if aktenzeichen_raw:
|
||||||
aktenzeichen = normalize_aktenzeichen(aktenzeichen_raw)
|
aktenzeichen = normalize_aktenzeichen(aktenzeichen_raw)
|
||||||
ctx.logger.info(f"🔍 Aktenzeichen detected: {aktenzeichen}")
|
collection_id = await lookup_collection_by_aktenzeichen(aktenzeichen, ctx)
|
||||||
|
|
||||||
# Lookup collection_id via EspoCRM
|
|
||||||
collection_id = await lookup_collection_by_aktenzeichen(
|
|
||||||
aktenzeichen, ctx
|
|
||||||
)
|
|
||||||
|
|
||||||
if collection_id:
|
if collection_id:
|
||||||
ctx.logger.info(f"✅ Collection found: {collection_id}")
|
|
||||||
|
|
||||||
# Remove Aktenzeichen from message (clean prompt)
|
|
||||||
from services.aktenzeichen_utils import remove_aktenzeichen
|
|
||||||
msg['content'] = remove_aktenzeichen(content)
|
msg['content'] = remove_aktenzeichen(content)
|
||||||
ctx.logger.debug(f"Cleaned message: {msg['content']}")
|
break
|
||||||
else:
|
|
||||||
ctx.logger.warn(f"⚠️ No collection found for {aktenzeichen}")
|
|
||||||
|
|
||||||
break # Only check first user message
|
|
||||||
|
|
||||||
# Priority 3: Error if no collection_id AND web_search disabled
|
|
||||||
if not collection_id and not enable_web_search:
|
if not collection_id and not enable_web_search:
|
||||||
ctx.logger.error("❌ No collection_id found and web_search disabled")
|
|
||||||
ctx.logger.error(" Provide collection_id, enable web_search, or both")
|
|
||||||
return ApiResponse(
|
return ApiResponse(
|
||||||
status=400,
|
status=400,
|
||||||
body={
|
body={'error': 'collection_id or web_search required'}
|
||||||
'error': 'collection_id or web_search required',
|
|
||||||
'message': 'Provide collection_id in extra_body, enable web_search, or start message with Aktenzeichen (e.g., "1234/56 question")'
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize LangChain xAI Service
|
# === Service initialisieren ===
|
||||||
try:
|
from services.langchain_xai_service import LangChainXAIService
|
||||||
langchain_service = LangChainXAIService(ctx)
|
langchain_service = LangChainXAIService(ctx)
|
||||||
except ValueError as e:
|
|
||||||
ctx.logger.error(f"❌ Service initialization failed: {e}")
|
|
||||||
return ApiResponse(
|
|
||||||
status=500,
|
|
||||||
body={'error': 'Service configuration error', 'details': str(e)}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create ChatXAI model
|
|
||||||
model = langchain_service.get_chat_model(
|
model = langchain_service.get_chat_model(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
max_tokens=max_tokens
|
max_tokens=max_tokens
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bind tools (file_search and/or web_search)
|
|
||||||
model_with_tools = langchain_service.bind_tools(
|
model_with_tools = langchain_service.bind_tools(
|
||||||
model=model,
|
model=model,
|
||||||
collection_id=collection_id,
|
collection_id=collection_id,
|
||||||
@@ -207,24 +113,76 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
|||||||
max_num_results=10
|
max_num_results=10
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate completion_id
|
|
||||||
completion_id = f"chatcmpl-{ctx.traceId[:12]}" if hasattr(ctx, 'traceId') else f"chatcmpl-{int(time.time())}"
|
completion_id = f"chatcmpl-{ctx.traceId[:12]}" if hasattr(ctx, 'traceId') else f"chatcmpl-{int(time.time())}"
|
||||||
created_ts = int(time.time())
|
created_ts = int(time.time())
|
||||||
|
|
||||||
# Branch: Streaming vs Non-Streaming
|
# ====================== ECHTES STREAMING ======================
|
||||||
if stream:
|
if stream:
|
||||||
ctx.logger.info("🌊 Starting streaming response...")
|
ctx.logger.info("🌊 Starting REAL SSE streaming (async generator)...")
|
||||||
return await handle_streaming_response(
|
|
||||||
model_with_tools=model_with_tools,
|
headers = {
|
||||||
messages=messages,
|
"Content-Type": "text/event-stream",
|
||||||
completion_id=completion_id,
|
"Cache-Control": "no-cache",
|
||||||
created_ts=created_ts,
|
"Connection": "keep-alive",
|
||||||
model_name=model_name,
|
"X-Accel-Buffering": "no", # nginx / proxies
|
||||||
langchain_service=langchain_service,
|
"Transfer-Encoding": "chunked",
|
||||||
ctx=ctx
|
}
|
||||||
|
|
||||||
|
async def sse_generator():
|
||||||
|
# Initial chunk (manche Clients brauchen das)
|
||||||
|
yield f'data: {json.dumps({"id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model_name, "choices": [{"index": 0, "delta": {}, "finish_reason": None}]}, ensure_ascii=False)}\n\n'
|
||||||
|
|
||||||
|
chunk_count = 0
|
||||||
|
async for chunk in langchain_service.astream_chat(model_with_tools, messages):
|
||||||
|
delta = ""
|
||||||
|
if hasattr(chunk, "content"):
|
||||||
|
content = chunk.content
|
||||||
|
if isinstance(content, str):
|
||||||
|
delta = content
|
||||||
|
elif isinstance(content, list):
|
||||||
|
text_parts = [item.get('text', '') for item in content if isinstance(item, dict) and item.get('type') == 'text']
|
||||||
|
delta = ''.join(text_parts)
|
||||||
|
|
||||||
|
if delta:
|
||||||
|
chunk_count += 1
|
||||||
|
data = {
|
||||||
|
"id": completion_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_ts,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"content": delta},
|
||||||
|
"finish_reason": None
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
yield f'data: {json.dumps(data, ensure_ascii=False)}\n\n'
|
||||||
|
|
||||||
|
# Finish
|
||||||
|
finish = {
|
||||||
|
"id": completion_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_ts,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {},
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
yield f'data: {json.dumps(finish, ensure_ascii=False)}\n\n'
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
|
||||||
|
ctx.logger.info(f"✅ Streaming abgeschlossen – {chunk_count} Chunks gesendet")
|
||||||
|
|
||||||
|
return ApiResponse(
|
||||||
|
status=200,
|
||||||
|
headers=headers,
|
||||||
|
body=sse_generator() # ← async generator = echtes Streaming!
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ====================== NON-STREAMING (unverändert + optimiert) ======================
|
||||||
else:
|
else:
|
||||||
ctx.logger.info("📦 Starting non-streaming response...")
|
|
||||||
return await handle_non_streaming_response(
|
return await handle_non_streaming_response(
|
||||||
model_with_tools=model_with_tools,
|
model_with_tools=model_with_tools,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
@@ -236,19 +194,10 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
|||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.logger.error("=" * 80)
|
ctx.logger.error(f"❌ ERROR: {e}", exc_info=True)
|
||||||
ctx.logger.error("❌ ERROR: AI CHAT COMPLETIONS API")
|
|
||||||
ctx.logger.error("=" * 80)
|
|
||||||
ctx.logger.error(f"Error: {e}", exc_info=True)
|
|
||||||
ctx.logger.error(f"Request body: {json.dumps(request.body, indent=2, ensure_ascii=False)}")
|
|
||||||
ctx.logger.error("=" * 80)
|
|
||||||
|
|
||||||
return ApiResponse(
|
return ApiResponse(
|
||||||
status=500,
|
status=500,
|
||||||
body={
|
body={'error': 'Internal server error', 'message': str(e)}
|
||||||
'error': 'Internal server error',
|
|
||||||
'message': str(e)
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -261,35 +210,31 @@ async def handle_non_streaming_response(
|
|||||||
langchain_service,
|
langchain_service,
|
||||||
ctx: FlowContext
|
ctx: FlowContext
|
||||||
) -> ApiResponse:
|
) -> ApiResponse:
|
||||||
"""
|
"""Non-Streaming Handler (optimiert)."""
|
||||||
Handle non-streaming chat completion.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ApiResponse with OpenAI-format JSON body
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# Invoke model
|
|
||||||
result = await langchain_service.invoke_chat(model_with_tools, messages)
|
result = await langchain_service.invoke_chat(model_with_tools, messages)
|
||||||
|
|
||||||
# Extract content - handle both string and structured responses
|
# Content extrahieren (kompatibel mit xAI structured output)
|
||||||
if hasattr(result, 'content'):
|
if hasattr(result, 'content'):
|
||||||
raw_content = result.content
|
raw = result.content
|
||||||
|
if isinstance(raw, list):
|
||||||
# If content is a list (tool calls + text message), extract text
|
text_parts = [item.get('text', '') for item in raw if isinstance(item, dict) and item.get('type') == 'text']
|
||||||
if isinstance(raw_content, list):
|
content = ''.join(text_parts) or str(raw)
|
||||||
# Find the text message (usually last element with type='text')
|
|
||||||
text_messages = [
|
|
||||||
item.get('text', '')
|
|
||||||
for item in raw_content
|
|
||||||
if isinstance(item, dict) and item.get('type') == 'text'
|
|
||||||
]
|
|
||||||
content = text_messages[0] if text_messages else str(raw_content)
|
|
||||||
else:
|
else:
|
||||||
content = raw_content
|
content = raw
|
||||||
else:
|
else:
|
||||||
content = str(result)
|
content = str(result)
|
||||||
|
|
||||||
# Build OpenAI-compatible response
|
# Usage (falls verfügbar)
|
||||||
|
usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
||||||
|
if hasattr(result, 'usage_metadata'):
|
||||||
|
u = result.usage_metadata
|
||||||
|
usage = {
|
||||||
|
"prompt_tokens": getattr(u, 'input_tokens', 0),
|
||||||
|
"completion_tokens": getattr(u, 'output_tokens', 0),
|
||||||
|
"total_tokens": getattr(u, 'input_tokens', 0) + getattr(u, 'output_tokens', 0)
|
||||||
|
}
|
||||||
|
|
||||||
response_body = {
|
response_body = {
|
||||||
'id': completion_id,
|
'id': completion_id,
|
||||||
'object': 'chat.completion',
|
'object': 'chat.completion',
|
||||||
@@ -297,233 +242,40 @@ async def handle_non_streaming_response(
|
|||||||
'model': model_name,
|
'model': model_name,
|
||||||
'choices': [{
|
'choices': [{
|
||||||
'index': 0,
|
'index': 0,
|
||||||
'message': {
|
'message': {'role': 'assistant', 'content': content},
|
||||||
'role': 'assistant',
|
|
||||||
'content': content
|
|
||||||
},
|
|
||||||
'finish_reason': 'stop'
|
'finish_reason': 'stop'
|
||||||
}],
|
}],
|
||||||
'usage': {
|
'usage': usage
|
||||||
'prompt_tokens': 0, # LangChain doesn't expose token counts easily
|
|
||||||
'completion_tokens': 0,
|
|
||||||
'total_tokens': 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Log token usage (if available)
|
ctx.logger.info(f"✅ Non-streaming fertig – {len(content)} Zeichen")
|
||||||
if hasattr(result, 'usage_metadata'):
|
return ApiResponse(status=200, body=response_body)
|
||||||
usage = result.usage_metadata
|
|
||||||
prompt_tokens = getattr(usage, 'input_tokens', 0)
|
|
||||||
completion_tokens = getattr(usage, 'output_tokens', 0)
|
|
||||||
response_body['usage'] = {
|
|
||||||
'prompt_tokens': prompt_tokens,
|
|
||||||
'completion_tokens': completion_tokens,
|
|
||||||
'total_tokens': prompt_tokens + completion_tokens
|
|
||||||
}
|
|
||||||
ctx.logger.info(f"📊 Token Usage: prompt={prompt_tokens}, completion={completion_tokens}")
|
|
||||||
|
|
||||||
# Log citations if available (from tool response annotations)
|
|
||||||
if hasattr(result, 'content') and isinstance(result.content, list):
|
|
||||||
# Extract citations from structured response
|
|
||||||
for item in result.content:
|
|
||||||
if isinstance(item, dict) and item.get('type') == 'text':
|
|
||||||
annotations = item.get('annotations', [])
|
|
||||||
if annotations:
|
|
||||||
ctx.logger.info(f"🔗 Citations: {len(annotations)}")
|
|
||||||
for i, citation in enumerate(annotations[:10], 1): # Log first 10
|
|
||||||
url = citation.get('url', 'N/A')
|
|
||||||
title = citation.get('title', '')
|
|
||||||
if url.startswith('collections://'):
|
|
||||||
# Internal collection reference
|
|
||||||
ctx.logger.debug(f" [{i}] Collection Document: {title}")
|
|
||||||
else:
|
|
||||||
# External URL
|
|
||||||
ctx.logger.debug(f" [{i}] {url}")
|
|
||||||
|
|
||||||
# Log complete response content
|
|
||||||
ctx.logger.info(f"✅ Chat completion: {len(content)} chars")
|
|
||||||
ctx.logger.info("=" * 80)
|
|
||||||
ctx.logger.info("📝 COMPLETE RESPONSE:")
|
|
||||||
ctx.logger.info("-" * 80)
|
|
||||||
ctx.logger.info(content)
|
|
||||||
ctx.logger.info("-" * 80)
|
|
||||||
ctx.logger.info("=" * 80)
|
|
||||||
|
|
||||||
return ApiResponse(
|
|
||||||
status=200,
|
|
||||||
body=response_body
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.logger.error(f"❌ Non-streaming completion failed: {e}", exc_info=True)
|
ctx.logger.error(f"❌ Non-streaming failed: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def handle_streaming_response(
|
async def lookup_collection_by_aktenzeichen(aktenzeichen: str, ctx: FlowContext) -> Optional[str]:
|
||||||
model_with_tools,
|
"""Aktenzeichen → Collection Lookup (unverändert)."""
|
||||||
messages: List[Dict[str, Any]],
|
|
||||||
completion_id: str,
|
|
||||||
created_ts: int,
|
|
||||||
model_name: str,
|
|
||||||
langchain_service,
|
|
||||||
ctx: FlowContext
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Handle streaming chat completion via SSE.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Streaming response generator
|
|
||||||
"""
|
|
||||||
async def stream_generator():
|
|
||||||
try:
|
|
||||||
# Set SSE headers
|
|
||||||
await ctx.response.status(200)
|
|
||||||
await ctx.response.headers({
|
|
||||||
"Content-Type": "text/event-stream",
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Connection": "keep-alive"
|
|
||||||
})
|
|
||||||
|
|
||||||
ctx.logger.info("🌊 Streaming started")
|
|
||||||
|
|
||||||
# Stream chunks
|
|
||||||
chunk_count = 0
|
|
||||||
total_content = ""
|
|
||||||
|
|
||||||
async for chunk in langchain_service.astream_chat(model_with_tools, messages):
|
|
||||||
# Extract delta content - handle structured chunks
|
|
||||||
if hasattr(chunk, "content"):
|
|
||||||
chunk_content = chunk.content
|
|
||||||
|
|
||||||
# If chunk content is a list (tool calls), extract text parts
|
|
||||||
if isinstance(chunk_content, list):
|
|
||||||
# Accumulate only text deltas
|
|
||||||
text_parts = [
|
|
||||||
item.get('text', '')
|
|
||||||
for item in chunk_content
|
|
||||||
if isinstance(item, dict) and item.get('type') == 'text'
|
|
||||||
]
|
|
||||||
delta = ''.join(text_parts)
|
|
||||||
else:
|
|
||||||
delta = chunk_content
|
|
||||||
else:
|
|
||||||
delta = ""
|
|
||||||
|
|
||||||
if delta:
|
|
||||||
total_content += delta
|
|
||||||
chunk_count += 1
|
|
||||||
|
|
||||||
# Build SSE data
|
|
||||||
data = {
|
|
||||||
"id": completion_id,
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"created": created_ts,
|
|
||||||
"model": model_name,
|
|
||||||
"choices": [{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"content": delta},
|
|
||||||
"finish_reason": None
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Send SSE event
|
|
||||||
await ctx.response.stream(f"data: {json.dumps(data, ensure_ascii=False)}\n\n")
|
|
||||||
|
|
||||||
# Send finish event
|
|
||||||
finish_data = {
|
|
||||||
"id": completion_id,
|
|
||||||
"object": "chat.completion.chunk",
|
|
||||||
"created": created_ts,
|
|
||||||
"model": model_name,
|
|
||||||
"choices": [{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {},
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
await ctx.response.stream(f"data: {json.dumps(finish_data)}\n\n")
|
|
||||||
|
|
||||||
# Send [DONE]
|
|
||||||
await ctx.response.stream("data: [DONE]\n\n")
|
|
||||||
|
|
||||||
# Close stream
|
|
||||||
await ctx.response.close()
|
|
||||||
|
|
||||||
# Log complete streamed response
|
|
||||||
ctx.logger.info(f"✅ Streaming completed: {chunk_count} chunks, {len(total_content)} chars")
|
|
||||||
ctx.logger.info("=" * 80)
|
|
||||||
ctx.logger.info("📝 COMPLETE STREAMED RESPONSE:")
|
|
||||||
ctx.logger.info("-" * 80)
|
|
||||||
ctx.logger.info(total_content)
|
|
||||||
ctx.logger.info("-" * 80)
|
|
||||||
ctx.logger.info("=" * 80)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
ctx.logger.error(f"❌ Streaming failed: {e}", exc_info=True)
|
|
||||||
|
|
||||||
# Send error event
|
|
||||||
error_data = {
|
|
||||||
"error": {
|
|
||||||
"message": str(e),
|
|
||||||
"type": "server_error"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await ctx.response.stream(f"data: {json.dumps(error_data)}\n\n")
|
|
||||||
await ctx.response.close()
|
|
||||||
|
|
||||||
return stream_generator()
|
|
||||||
|
|
||||||
|
|
||||||
async def lookup_collection_by_aktenzeichen(
|
|
||||||
aktenzeichen: str,
|
|
||||||
ctx: FlowContext
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Lookup xAI Collection ID for Aktenzeichen via EspoCRM.
|
|
||||||
|
|
||||||
Search strategy:
|
|
||||||
1. Search for Raeumungsklage with matching advowareAkteBezeichner
|
|
||||||
2. Return xaiCollectionId if found
|
|
||||||
|
|
||||||
Args:
|
|
||||||
aktenzeichen: Normalized Aktenzeichen (e.g., "1234/56")
|
|
||||||
ctx: Motia context
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Collection ID or None if not found
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# Initialize EspoCRM API
|
from services.espocrm import EspoCRMAPI
|
||||||
espocrm = EspoCRMAPI(ctx)
|
espocrm = EspoCRMAPI(ctx)
|
||||||
|
ctx.logger.info(f"🔍 Suche Räumungsklage für Aktenzeichen: {aktenzeichen}")
|
||||||
# Search Räumungsklage by advowareAkteBezeichner
|
|
||||||
ctx.logger.info(f"🔍 Searching Räumungsklage for Aktenzeichen: {aktenzeichen}")
|
|
||||||
|
|
||||||
search_result = await espocrm.search_entities(
|
search_result = await espocrm.search_entities(
|
||||||
entity_type='Raeumungsklage',
|
entity_type='Raeumungsklage',
|
||||||
where=[{
|
where=[{'type': 'equals', 'attribute': 'advowareAkteBezeichner', 'value': aktenzeichen}],
|
||||||
'type': 'equals',
|
select=['id', 'xaiCollectionId'],
|
||||||
'attribute': 'advowareAkteBezeichner',
|
|
||||||
'value': aktenzeichen
|
|
||||||
}],
|
|
||||||
select=['id', 'xaiCollectionId', 'advowareAkteBezeichner'],
|
|
||||||
maxSize=1
|
maxSize=1
|
||||||
)
|
)
|
||||||
|
|
||||||
if search_result and len(search_result) > 0:
|
if search_result and len(search_result) > 0:
|
||||||
entity = search_result[0]
|
collection_id = search_result[0].get('xaiCollectionId')
|
||||||
collection_id = entity.get('xaiCollectionId')
|
|
||||||
|
|
||||||
if collection_id:
|
if collection_id:
|
||||||
ctx.logger.info(f"✅ Found Räumungsklage: {entity.get('id')}")
|
ctx.logger.info(f"✅ Collection gefunden: {collection_id}")
|
||||||
return collection_id
|
return collection_id
|
||||||
else:
|
|
||||||
ctx.logger.warn(f"⚠️ Räumungsklage found but no xaiCollectionId: {entity.get('id')}")
|
|
||||||
else:
|
|
||||||
ctx.logger.warn(f"⚠️ No Räumungsklage found for {aktenzeichen}")
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.logger.error(f"❌ Collection lookup failed: {e}", exc_info=True)
|
ctx.logger.error(f"❌ Lookup failed: {e}")
|
||||||
return None
|
return None
|
||||||
124
steps/ai/models_list_api_step.py
Normal file
124
steps/ai/models_list_api_step.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""AI Models List API
|
||||||
|
|
||||||
|
OpenAI-compatible models list endpoint for OpenWebUI and other clients.
|
||||||
|
Returns all available AI models that can be used with /ai/chat/completions.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
from motia import FlowContext, http, ApiRequest, ApiResponse
|
||||||
|
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"name": "AI Models List API",
|
||||||
|
"description": "OpenAI-compatible models endpoint - lists available AI models",
|
||||||
|
"flows": ["ai-general"],
|
||||||
|
"triggers": [
|
||||||
|
http("GET", "/ai/v1/models"),
|
||||||
|
http("GET", "/v1/models"),
|
||||||
|
http("GET", "/ai/models")
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
||||||
|
"""
|
||||||
|
OpenAI-compatible models list endpoint.
|
||||||
|
|
||||||
|
Returns list of available models for OpenWebUI and other clients.
|
||||||
|
|
||||||
|
Response Format (OpenAI compatible):
|
||||||
|
{
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": "grok-4.20-beta-0309-reasoning",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1735689600,
|
||||||
|
"owned_by": "xai",
|
||||||
|
"permission": [],
|
||||||
|
"root": "grok-4.20-beta-0309-reasoning",
|
||||||
|
"parent": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
ctx.logger.info("📋 Models list requested")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Define available models
|
||||||
|
# These correspond to models supported by /ai/chat/completions
|
||||||
|
current_timestamp = int(time.time())
|
||||||
|
|
||||||
|
models = [
|
||||||
|
{
|
||||||
|
"id": "grok-4.20-beta-0309-reasoning",
|
||||||
|
"object": "model",
|
||||||
|
"created": current_timestamp,
|
||||||
|
"owned_by": "xai",
|
||||||
|
"permission": [],
|
||||||
|
"root": "grok-4.20-beta-0309-reasoning",
|
||||||
|
"parent": None,
|
||||||
|
"capabilities": {
|
||||||
|
"file_search": True,
|
||||||
|
"web_search": True,
|
||||||
|
"streaming": True,
|
||||||
|
"reasoning": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "grok-4.20-multi-agent-beta-0309",
|
||||||
|
"object": "model",
|
||||||
|
"created": current_timestamp,
|
||||||
|
"owned_by": "xai",
|
||||||
|
"permission": [],
|
||||||
|
"root": "grok-4.20-multi-agent-beta-0309",
|
||||||
|
"parent": None,
|
||||||
|
"capabilities": {
|
||||||
|
"file_search": True,
|
||||||
|
"web_search": True,
|
||||||
|
"streaming": True,
|
||||||
|
"reasoning": True,
|
||||||
|
"multi_agent": True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "grok-4-1-fast-reasoning",
|
||||||
|
"object": "model",
|
||||||
|
"created": current_timestamp,
|
||||||
|
"owned_by": "xai",
|
||||||
|
"permission": [],
|
||||||
|
"root": "grok-4-1-fast-reasoning",
|
||||||
|
"parent": None,
|
||||||
|
"capabilities": {
|
||||||
|
"file_search": True,
|
||||||
|
"web_search": True,
|
||||||
|
"streaming": True,
|
||||||
|
"reasoning": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Build OpenAI-compatible response
|
||||||
|
response_body = {
|
||||||
|
"object": "list",
|
||||||
|
"data": models
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.logger.info(f"✅ Returned {len(models)} models")
|
||||||
|
|
||||||
|
return ApiResponse(
|
||||||
|
status=200,
|
||||||
|
body=response_body
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.error(f"❌ Error listing models: {e}", exc_info=True)
|
||||||
|
return ApiResponse(
|
||||||
|
status=500,
|
||||||
|
body={
|
||||||
|
"error": {
|
||||||
|
"message": str(e),
|
||||||
|
"type": "server_error"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
523
steps/vmh/xai_chat_completion_api_step.py
Normal file
523
steps/vmh/xai_chat_completion_api_step.py
Normal file
@@ -0,0 +1,523 @@
|
|||||||
|
"""VMH xAI Chat Completions API
|
||||||
|
|
||||||
|
OpenAI-kompatible Chat Completions API mit xAI/LangChain Backend.
|
||||||
|
Unterstützt file_search über xAI Collections (RAG).
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
from motia import FlowContext, http, ApiRequest, ApiResponse
|
||||||
|
|
||||||
|
|
||||||
|
config = {
|
||||||
|
"name": "VMH xAI Chat Completions API",
|
||||||
|
"description": "OpenAI-compatible Chat Completions API with xAI LangChain backend",
|
||||||
|
"flows": ["vmh-chat"],
|
||||||
|
"triggers": [
|
||||||
|
http("POST", "/vmh/v1/chat/completions")
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
|
||||||
|
"""
|
||||||
|
OpenAI-compatible Chat Completions endpoint.
|
||||||
|
|
||||||
|
Request Body (OpenAI format):
|
||||||
|
{
|
||||||
|
"model": "grok-2-latest",
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "You are helpful"},
|
||||||
|
{"role": "user", "content": "1234/56 Was ist der Stand?"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 2000,
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {
|
||||||
|
"collection_id": "col_abc123", // Optional: override auto-detection
|
||||||
|
"enable_web_search": true, // Optional: enable web search (default: false)
|
||||||
|
"web_search_config": { // Optional: web search configuration
|
||||||
|
"allowed_domains": ["example.com"],
|
||||||
|
"excluded_domains": ["spam.com"],
|
||||||
|
"enable_image_understanding": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Aktenzeichen-Erkennung (Priority):
|
||||||
|
1. extra_body.collection_id (explicit override)
|
||||||
|
2. First user message starts with Aktenzeichen (e.g., "1234/56 ...")
|
||||||
|
3. Error 400 if no collection_id found (strict mode)
|
||||||
|
|
||||||
|
Response (OpenAI format):
|
||||||
|
Non-Streaming:
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-...",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1234567890,
|
||||||
|
"model": "grok-2-latest",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"message": {"role": "assistant", "content": "..."},
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}],
|
||||||
|
"usage": {"prompt_tokens": X, "completion_tokens": Y, "total_tokens": Z}
|
||||||
|
}
|
||||||
|
|
||||||
|
Streaming (SSE):
|
||||||
|
data: {"id":"chatcmpl-...","choices":[{"delta":{"content":"Hello"},...}]}
|
||||||
|
data: {"id":"chatcmpl-...","choices":[{"delta":{"content":" world"},...}]}
|
||||||
|
data: {"choices":[{"delta":{},"finish_reason":"stop"}]}
|
||||||
|
data: [DONE]
|
||||||
|
"""
|
||||||
|
from services.langchain_xai_service import LangChainXAIService
|
||||||
|
from services.aktenzeichen_utils import extract_aktenzeichen, normalize_aktenzeichen
|
||||||
|
from services.espocrm import EspoCRMAPI
|
||||||
|
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
ctx.logger.info("💬 VMH CHAT COMPLETIONS API")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Parse request body
|
||||||
|
body = request.body or {}
|
||||||
|
|
||||||
|
if not isinstance(body, dict):
|
||||||
|
ctx.logger.error(f"❌ Invalid request body type: {type(body)}")
|
||||||
|
return ApiResponse(
|
||||||
|
status=400,
|
||||||
|
body={'error': 'Request body must be JSON object'}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract parameters
|
||||||
|
model_name = body.get('model', 'grok-4.20-beta-0309-reasoning')
|
||||||
|
messages = body.get('messages', [])
|
||||||
|
temperature = body.get('temperature', 0.7)
|
||||||
|
max_tokens = body.get('max_tokens')
|
||||||
|
stream = body.get('stream', False)
|
||||||
|
extra_body = body.get('extra_body', {})
|
||||||
|
|
||||||
|
# Web Search parameters (default: disabled)
|
||||||
|
enable_web_search = extra_body.get('enable_web_search', False)
|
||||||
|
web_search_config = extra_body.get('web_search_config', {})
|
||||||
|
|
||||||
|
ctx.logger.info(f"📋 Model: {model_name}")
|
||||||
|
ctx.logger.info(f"📋 Messages: {len(messages)}")
|
||||||
|
ctx.logger.info(f"📋 Stream: {stream}")
|
||||||
|
ctx.logger.info(f"📋 Web Search: {'enabled' if enable_web_search else 'disabled'}")
|
||||||
|
if enable_web_search and web_search_config:
|
||||||
|
ctx.logger.debug(f"Web Search Config: {json.dumps(web_search_config, indent=2)}")
|
||||||
|
|
||||||
|
# Log full conversation messages
|
||||||
|
ctx.logger.info("-" * 80)
|
||||||
|
ctx.logger.info("📨 REQUEST MESSAGES:")
|
||||||
|
for i, msg in enumerate(messages, 1):
|
||||||
|
role = msg.get('role', 'unknown')
|
||||||
|
content = msg.get('content', '')
|
||||||
|
preview = content[:150] + "..." if len(content) > 150 else content
|
||||||
|
ctx.logger.info(f" [{i}] {role}: {preview}")
|
||||||
|
ctx.logger.info("-" * 80)
|
||||||
|
|
||||||
|
# Validate messages
|
||||||
|
if not messages or not isinstance(messages, list):
|
||||||
|
ctx.logger.error("❌ Missing or invalid messages array")
|
||||||
|
return ApiResponse(
|
||||||
|
status=400,
|
||||||
|
body={'error': 'messages must be non-empty array'}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Determine collection_id (Priority: extra_body > Aktenzeichen > error)
|
||||||
|
collection_id: Optional[str] = None
|
||||||
|
aktenzeichen: Optional[str] = None
|
||||||
|
|
||||||
|
# Priority 1: Explicit collection_id in extra_body
|
||||||
|
if 'collection_id' in extra_body:
|
||||||
|
collection_id = extra_body['collection_id']
|
||||||
|
ctx.logger.info(f"🔍 Collection ID from extra_body: {collection_id}")
|
||||||
|
|
||||||
|
# Priority 2: Extract Aktenzeichen from first user message
|
||||||
|
else:
|
||||||
|
for msg in messages:
|
||||||
|
if msg.get('role') == 'user':
|
||||||
|
content = msg.get('content', '')
|
||||||
|
aktenzeichen_raw = extract_aktenzeichen(content)
|
||||||
|
|
||||||
|
if aktenzeichen_raw:
|
||||||
|
aktenzeichen = normalize_aktenzeichen(aktenzeichen_raw)
|
||||||
|
ctx.logger.info(f"🔍 Aktenzeichen detected: {aktenzeichen}")
|
||||||
|
|
||||||
|
# Lookup collection_id via EspoCRM
|
||||||
|
collection_id = await lookup_collection_by_aktenzeichen(
|
||||||
|
aktenzeichen, ctx
|
||||||
|
)
|
||||||
|
|
||||||
|
if collection_id:
|
||||||
|
ctx.logger.info(f"✅ Collection found: {collection_id}")
|
||||||
|
|
||||||
|
# Remove Aktenzeichen from message (clean prompt)
|
||||||
|
from services.aktenzeichen_utils import remove_aktenzeichen
|
||||||
|
msg['content'] = remove_aktenzeichen(content)
|
||||||
|
ctx.logger.debug(f"Cleaned message: {msg['content']}")
|
||||||
|
else:
|
||||||
|
ctx.logger.warn(f"⚠️ No collection found for {aktenzeichen}")
|
||||||
|
|
||||||
|
break # Only check first user message
|
||||||
|
|
||||||
|
# Priority 3: Error if no collection_id AND web_search disabled
|
||||||
|
if not collection_id and not enable_web_search:
|
||||||
|
ctx.logger.error("❌ No collection_id found and web_search disabled")
|
||||||
|
ctx.logger.error(" Provide collection_id, enable web_search, or both")
|
||||||
|
return ApiResponse(
|
||||||
|
status=400,
|
||||||
|
body={
|
||||||
|
'error': 'collection_id or web_search required',
|
||||||
|
'message': 'Provide collection_id in extra_body, enable web_search, or start message with Aktenzeichen (e.g., "1234/56 question")'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize LangChain xAI Service
|
||||||
|
try:
|
||||||
|
langchain_service = LangChainXAIService(ctx)
|
||||||
|
except ValueError as e:
|
||||||
|
ctx.logger.error(f"❌ Service initialization failed: {e}")
|
||||||
|
return ApiResponse(
|
||||||
|
status=500,
|
||||||
|
body={'error': 'Service configuration error', 'details': str(e)}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create ChatXAI model
|
||||||
|
model = langchain_service.get_chat_model(
|
||||||
|
model=model_name,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bind tools (file_search and/or web_search)
|
||||||
|
model_with_tools = langchain_service.bind_tools(
|
||||||
|
model=model,
|
||||||
|
collection_id=collection_id,
|
||||||
|
enable_web_search=enable_web_search,
|
||||||
|
web_search_config=web_search_config,
|
||||||
|
max_num_results=10
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate completion_id
|
||||||
|
completion_id = f"chatcmpl-{ctx.traceId[:12]}" if hasattr(ctx, 'traceId') else f"chatcmpl-{int(time.time())}"
|
||||||
|
created_ts = int(time.time())
|
||||||
|
|
||||||
|
# Branch: Streaming vs Non-Streaming
|
||||||
|
if stream:
|
||||||
|
ctx.logger.info("🌊 Starting streaming response...")
|
||||||
|
return await handle_streaming_response(
|
||||||
|
model_with_tools=model_with_tools,
|
||||||
|
messages=messages,
|
||||||
|
completion_id=completion_id,
|
||||||
|
created_ts=created_ts,
|
||||||
|
model_name=model_name,
|
||||||
|
langchain_service=langchain_service,
|
||||||
|
ctx=ctx
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
ctx.logger.info("📦 Starting non-streaming response...")
|
||||||
|
return await handle_non_streaming_response(
|
||||||
|
model_with_tools=model_with_tools,
|
||||||
|
messages=messages,
|
||||||
|
completion_id=completion_id,
|
||||||
|
created_ts=created_ts,
|
||||||
|
model_name=model_name,
|
||||||
|
langchain_service=langchain_service,
|
||||||
|
ctx=ctx
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.error("=" * 80)
|
||||||
|
ctx.logger.error("❌ ERROR: CHAT COMPLETIONS API")
|
||||||
|
ctx.logger.error("=" * 80)
|
||||||
|
ctx.logger.error(f"Error: {e}", exc_info=True)
|
||||||
|
ctx.logger.error(f"Request body: {json.dumps(request.body, indent=2, ensure_ascii=False)}")
|
||||||
|
ctx.logger.error("=" * 80)
|
||||||
|
|
||||||
|
return ApiResponse(
|
||||||
|
status=500,
|
||||||
|
body={
|
||||||
|
'error': 'Internal server error',
|
||||||
|
'message': str(e)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_non_streaming_response(
|
||||||
|
model_with_tools,
|
||||||
|
messages: List[Dict[str, Any]],
|
||||||
|
completion_id: str,
|
||||||
|
created_ts: int,
|
||||||
|
model_name: str,
|
||||||
|
langchain_service,
|
||||||
|
ctx: FlowContext
|
||||||
|
) -> ApiResponse:
|
||||||
|
"""
|
||||||
|
Handle non-streaming chat completion.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ApiResponse with OpenAI-format JSON body
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Invoke model
|
||||||
|
result = await langchain_service.invoke_chat(model_with_tools, messages)
|
||||||
|
|
||||||
|
# Extract content - handle both string and structured responses
|
||||||
|
if hasattr(result, 'content'):
|
||||||
|
raw_content = result.content
|
||||||
|
|
||||||
|
# If content is a list (tool calls + text message), extract text
|
||||||
|
if isinstance(raw_content, list):
|
||||||
|
# Find the text message (usually last element with type='text')
|
||||||
|
text_messages = [
|
||||||
|
item.get('text', '')
|
||||||
|
for item in raw_content
|
||||||
|
if isinstance(item, dict) and item.get('type') == 'text'
|
||||||
|
]
|
||||||
|
content = text_messages[0] if text_messages else str(raw_content)
|
||||||
|
else:
|
||||||
|
content = raw_content
|
||||||
|
else:
|
||||||
|
content = str(result)
|
||||||
|
|
||||||
|
# Build OpenAI-compatible response
|
||||||
|
response_body = {
|
||||||
|
'id': completion_id,
|
||||||
|
'object': 'chat.completion',
|
||||||
|
'created': created_ts,
|
||||||
|
'model': model_name,
|
||||||
|
'choices': [{
|
||||||
|
'index': 0,
|
||||||
|
'message': {
|
||||||
|
'role': 'assistant',
|
||||||
|
'content': content
|
||||||
|
},
|
||||||
|
'finish_reason': 'stop'
|
||||||
|
}],
|
||||||
|
'usage': {
|
||||||
|
'prompt_tokens': 0, # LangChain doesn't expose token counts easily
|
||||||
|
'completion_tokens': 0,
|
||||||
|
'total_tokens': 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Log token usage (if available)
|
||||||
|
if hasattr(result, 'usage_metadata'):
|
||||||
|
usage = result.usage_metadata
|
||||||
|
prompt_tokens = getattr(usage, 'input_tokens', 0)
|
||||||
|
completion_tokens = getattr(usage, 'output_tokens', 0)
|
||||||
|
response_body['usage'] = {
|
||||||
|
'prompt_tokens': prompt_tokens,
|
||||||
|
'completion_tokens': completion_tokens,
|
||||||
|
'total_tokens': prompt_tokens + completion_tokens
|
||||||
|
}
|
||||||
|
ctx.logger.info(f"📊 Token Usage: prompt={prompt_tokens}, completion={completion_tokens}")
|
||||||
|
|
||||||
|
# Log citations if available (from tool response annotations)
|
||||||
|
if hasattr(result, 'content') and isinstance(result.content, list):
|
||||||
|
# Extract citations from structured response
|
||||||
|
for item in result.content:
|
||||||
|
if isinstance(item, dict) and item.get('type') == 'text':
|
||||||
|
annotations = item.get('annotations', [])
|
||||||
|
if annotations:
|
||||||
|
ctx.logger.info(f"🔗 Citations: {len(annotations)}")
|
||||||
|
for i, citation in enumerate(annotations[:10], 1): # Log first 10
|
||||||
|
url = citation.get('url', 'N/A')
|
||||||
|
title = citation.get('title', '')
|
||||||
|
if url.startswith('collections://'):
|
||||||
|
# Internal collection reference
|
||||||
|
ctx.logger.debug(f" [{i}] Collection Document: {title}")
|
||||||
|
else:
|
||||||
|
# External URL
|
||||||
|
ctx.logger.debug(f" [{i}] {url}")
|
||||||
|
|
||||||
|
# Log complete response content
|
||||||
|
ctx.logger.info(f"✅ Chat completion: {len(content)} chars")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
ctx.logger.info("📝 COMPLETE RESPONSE:")
|
||||||
|
ctx.logger.info("-" * 80)
|
||||||
|
ctx.logger.info(content)
|
||||||
|
ctx.logger.info("-" * 80)
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
|
return ApiResponse(
|
||||||
|
status=200,
|
||||||
|
body=response_body
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.error(f"❌ Non-streaming completion failed: {e}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_streaming_response(
|
||||||
|
model_with_tools,
|
||||||
|
messages: List[Dict[str, Any]],
|
||||||
|
completion_id: str,
|
||||||
|
created_ts: int,
|
||||||
|
model_name: str,
|
||||||
|
langchain_service,
|
||||||
|
ctx: FlowContext
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Handle streaming chat completion via SSE.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Streaming response generator
|
||||||
|
"""
|
||||||
|
async def stream_generator():
|
||||||
|
try:
|
||||||
|
# Set SSE headers
|
||||||
|
await ctx.response.status(200)
|
||||||
|
await ctx.response.headers({
|
||||||
|
"Content-Type": "text/event-stream",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
"Connection": "keep-alive"
|
||||||
|
})
|
||||||
|
|
||||||
|
ctx.logger.info("🌊 Streaming started")
|
||||||
|
|
||||||
|
# Stream chunks
|
||||||
|
chunk_count = 0
|
||||||
|
total_content = ""
|
||||||
|
|
||||||
|
async for chunk in langchain_service.astream_chat(model_with_tools, messages):
|
||||||
|
# Extract delta content - handle structured chunks
|
||||||
|
if hasattr(chunk, "content"):
|
||||||
|
chunk_content = chunk.content
|
||||||
|
|
||||||
|
# If chunk content is a list (tool calls), extract text parts
|
||||||
|
if isinstance(chunk_content, list):
|
||||||
|
# Accumulate only text deltas
|
||||||
|
text_parts = [
|
||||||
|
item.get('text', '')
|
||||||
|
for item in chunk_content
|
||||||
|
if isinstance(item, dict) and item.get('type') == 'text'
|
||||||
|
]
|
||||||
|
delta = ''.join(text_parts)
|
||||||
|
else:
|
||||||
|
delta = chunk_content
|
||||||
|
else:
|
||||||
|
delta = ""
|
||||||
|
|
||||||
|
if delta:
|
||||||
|
total_content += delta
|
||||||
|
chunk_count += 1
|
||||||
|
|
||||||
|
# Build SSE data
|
||||||
|
data = {
|
||||||
|
"id": completion_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_ts,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"content": delta},
|
||||||
|
"finish_reason": None
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send SSE event
|
||||||
|
await ctx.response.stream(f"data: {json.dumps(data, ensure_ascii=False)}\n\n")
|
||||||
|
|
||||||
|
# Send finish event
|
||||||
|
finish_data = {
|
||||||
|
"id": completion_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_ts,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {},
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
await ctx.response.stream(f"data: {json.dumps(finish_data)}\n\n")
|
||||||
|
|
||||||
|
# Send [DONE]
|
||||||
|
await ctx.response.stream("data: [DONE]\n\n")
|
||||||
|
|
||||||
|
# Close stream
|
||||||
|
await ctx.response.close()
|
||||||
|
|
||||||
|
# Log complete streamed response
|
||||||
|
ctx.logger.info(f"✅ Streaming completed: {chunk_count} chunks, {len(total_content)} chars")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
ctx.logger.info("📝 COMPLETE STREAMED RESPONSE:")
|
||||||
|
ctx.logger.info("-" * 80)
|
||||||
|
ctx.logger.info(total_content)
|
||||||
|
ctx.logger.info("-" * 80)
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.error(f"❌ Streaming failed: {e}", exc_info=True)
|
||||||
|
|
||||||
|
# Send error event
|
||||||
|
error_data = {
|
||||||
|
"error": {
|
||||||
|
"message": str(e),
|
||||||
|
"type": "server_error"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await ctx.response.stream(f"data: {json.dumps(error_data)}\n\n")
|
||||||
|
await ctx.response.close()
|
||||||
|
|
||||||
|
return stream_generator()
|
||||||
|
|
||||||
|
|
||||||
|
async def lookup_collection_by_aktenzeichen(
|
||||||
|
aktenzeichen: str,
|
||||||
|
ctx: FlowContext
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Lookup xAI Collection ID for Aktenzeichen via EspoCRM.
|
||||||
|
|
||||||
|
Search strategy:
|
||||||
|
1. Search for Raeumungsklage with matching advowareAkteBezeichner
|
||||||
|
2. Return xaiCollectionId if found
|
||||||
|
|
||||||
|
Args:
|
||||||
|
aktenzeichen: Normalized Aktenzeichen (e.g., "1234/56")
|
||||||
|
ctx: Motia context
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Collection ID or None if not found
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Initialize EspoCRM API
|
||||||
|
espocrm = EspoCRMAPI(ctx)
|
||||||
|
|
||||||
|
# Search Räumungsklage by advowareAkteBezeichner
|
||||||
|
ctx.logger.info(f"🔍 Searching Räumungsklage for Aktenzeichen: {aktenzeichen}")
|
||||||
|
|
||||||
|
search_result = await espocrm.search_entities(
|
||||||
|
entity_type='Raeumungsklage',
|
||||||
|
where=[{
|
||||||
|
'type': 'equals',
|
||||||
|
'attribute': 'advowareAkteBezeichner',
|
||||||
|
'value': aktenzeichen
|
||||||
|
}],
|
||||||
|
select=['id', 'xaiCollectionId', 'advowareAkteBezeichner'],
|
||||||
|
maxSize=1
|
||||||
|
)
|
||||||
|
|
||||||
|
if search_result and len(search_result) > 0:
|
||||||
|
entity = search_result[0]
|
||||||
|
collection_id = entity.get('xaiCollectionId')
|
||||||
|
|
||||||
|
if collection_id:
|
||||||
|
ctx.logger.info(f"✅ Found Räumungsklage: {entity.get('id')}")
|
||||||
|
return collection_id
|
||||||
|
else:
|
||||||
|
ctx.logger.warn(f"⚠️ Räumungsklage found but no xaiCollectionId: {entity.get('id')}")
|
||||||
|
else:
|
||||||
|
ctx.logger.warn(f"⚠️ No Räumungsklage found for {aktenzeichen}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.error(f"❌ Collection lookup failed: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
Reference in New Issue
Block a user