"""VMH xAI Chat Completions API OpenAI-kompatible Chat Completions API mit xAI/LangChain Backend. Unterstützt file_search über xAI Collections (RAG). """ import json import time from typing import Any, Dict, List, Optional from motia import FlowContext, http, ApiRequest, ApiResponse config = { "name": "VMH xAI Chat Completions API", "description": "OpenAI-compatible Chat Completions API with xAI LangChain backend", "flows": ["vmh-chat"], "triggers": [ http("POST", "/vmh/v1/chat/completions") ], } async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: """ OpenAI-compatible Chat Completions endpoint. Request Body (OpenAI format): { "model": "grok-2-latest", "messages": [ {"role": "system", "content": "You are helpful"}, {"role": "user", "content": "1234/56 Was ist der Stand?"} ], "temperature": 0.7, "max_tokens": 2000, "stream": false, "extra_body": { "collection_id": "col_abc123" // Optional: override auto-detection } } Aktenzeichen-Erkennung (Priority): 1. extra_body.collection_id (explicit override) 2. First user message starts with Aktenzeichen (e.g., "1234/56 ...") 3. Error 400 if no collection_id found (strict mode) Response (OpenAI format): Non-Streaming: { "id": "chatcmpl-...", "object": "chat.completion", "created": 1234567890, "model": "grok-2-latest", "choices": [{ "index": 0, "message": {"role": "assistant", "content": "..."}, "finish_reason": "stop" }], "usage": {"prompt_tokens": X, "completion_tokens": Y, "total_tokens": Z} } Streaming (SSE): data: {"id":"chatcmpl-...","choices":[{"delta":{"content":"Hello"},...}]} data: {"id":"chatcmpl-...","choices":[{"delta":{"content":" world"},...}]} data: {"choices":[{"delta":{},"finish_reason":"stop"}]} data: [DONE] """ from services.langchain_xai_service import LangChainXAIService from services.aktenzeichen_utils import extract_aktenzeichen, normalize_aktenzeichen from services.espocrm import EspoCRMAPI ctx.logger.info("=" * 80) ctx.logger.info("💬 VMH CHAT COMPLETIONS API") ctx.logger.info("=" * 80) try: # Parse request body body = request.body or {} if not isinstance(body, dict): ctx.logger.error(f"❌ Invalid request body type: {type(body)}") return ApiResponse( status=400, body={'error': 'Request body must be JSON object'} ) # Extract parameters model_name = body.get('model', 'grok-4-1-fast-reasoning') messages = body.get('messages', []) temperature = body.get('temperature', 0.7) max_tokens = body.get('max_tokens') stream = body.get('stream', False) extra_body = body.get('extra_body', {}) ctx.logger.info(f"📋 Model: {model_name}") ctx.logger.info(f"📋 Messages: {len(messages)}") ctx.logger.info(f"📋 Stream: {stream}") ctx.logger.debug(f"Messages: {json.dumps(messages, indent=2, ensure_ascii=False)}") # Validate messages if not messages or not isinstance(messages, list): ctx.logger.error("❌ Missing or invalid messages array") return ApiResponse( status=400, body={'error': 'messages must be non-empty array'} ) # Determine collection_id (Priority: extra_body > Aktenzeichen > error) collection_id: Optional[str] = None aktenzeichen: Optional[str] = None # Priority 1: Explicit collection_id in extra_body if 'collection_id' in extra_body: collection_id = extra_body['collection_id'] ctx.logger.info(f"🔍 Collection ID from extra_body: {collection_id}") # Priority 2: Extract Aktenzeichen from first user message else: for msg in messages: if msg.get('role') == 'user': content = msg.get('content', '') aktenzeichen_raw = extract_aktenzeichen(content) if aktenzeichen_raw: aktenzeichen = normalize_aktenzeichen(aktenzeichen_raw) ctx.logger.info(f"🔍 Aktenzeichen detected: {aktenzeichen}") # Lookup collection_id via EspoCRM collection_id = await lookup_collection_by_aktenzeichen( aktenzeichen, ctx ) if collection_id: ctx.logger.info(f"✅ Collection found: {collection_id}") # Remove Aktenzeichen from message (clean prompt) from services.aktenzeichen_utils import remove_aktenzeichen msg['content'] = remove_aktenzeichen(content) ctx.logger.debug(f"Cleaned message: {msg['content']}") else: ctx.logger.warn(f"⚠️ No collection found for {aktenzeichen}") break # Only check first user message # Priority 3: Error if no collection_id (strict mode) if not collection_id: ctx.logger.error("❌ No collection_id found (neither extra_body nor Aktenzeichen)") ctx.logger.error(" Provide collection_id in extra_body or start message with Aktenzeichen") return ApiResponse( status=400, body={ 'error': 'collection_id required', 'message': 'Provide collection_id in extra_body or start message with Aktenzeichen (e.g., "1234/56 question")' } ) # Initialize LangChain xAI Service try: langchain_service = LangChainXAIService(ctx) except ValueError as e: ctx.logger.error(f"❌ Service initialization failed: {e}") return ApiResponse( status=500, body={'error': 'Service configuration error', 'details': str(e)} ) # Create ChatXAI model model = langchain_service.get_chat_model( model=model_name, temperature=temperature, max_tokens=max_tokens ) # Bind file_search tool model_with_tools = langchain_service.bind_file_search( model=model, collection_id=collection_id, max_num_results=10 ) # Generate completion_id completion_id = f"chatcmpl-{ctx.traceId[:12]}" if hasattr(ctx, 'traceId') else f"chatcmpl-{int(time.time())}" created_ts = int(time.time()) # Branch: Streaming vs Non-Streaming if stream: ctx.logger.info("🌊 Starting streaming response...") return await handle_streaming_response( model_with_tools=model_with_tools, messages=messages, completion_id=completion_id, created_ts=created_ts, model_name=model_name, langchain_service=langchain_service, ctx=ctx ) else: ctx.logger.info("📦 Starting non-streaming response...") return await handle_non_streaming_response( model_with_tools=model_with_tools, messages=messages, completion_id=completion_id, created_ts=created_ts, model_name=model_name, langchain_service=langchain_service, ctx=ctx ) except Exception as e: ctx.logger.error("=" * 80) ctx.logger.error("❌ ERROR: CHAT COMPLETIONS API") ctx.logger.error("=" * 80) ctx.logger.error(f"Error: {e}", exc_info=True) ctx.logger.error(f"Request body: {json.dumps(request.body, indent=2, ensure_ascii=False)}") ctx.logger.error("=" * 80) return ApiResponse( status=500, body={ 'error': 'Internal server error', 'message': str(e) } ) async def handle_non_streaming_response( model_with_tools, messages: List[Dict[str, Any]], completion_id: str, created_ts: int, model_name: str, langchain_service, ctx: FlowContext ) -> ApiResponse: """ Handle non-streaming chat completion. Returns: ApiResponse with OpenAI-format JSON body """ try: # Invoke model result = await langchain_service.invoke_chat(model_with_tools, messages) # Extract content content = result.content if hasattr(result, 'content') else str(result) # Build OpenAI-compatible response response_body = { 'id': completion_id, 'object': 'chat.completion', 'created': created_ts, 'model': model_name, 'choices': [{ 'index': 0, 'message': { 'role': 'assistant', 'content': content }, 'finish_reason': 'stop' }], 'usage': { 'prompt_tokens': 0, # LangChain doesn't expose token counts easily 'completion_tokens': 0, 'total_tokens': 0 } } # Log token usage (if available) if hasattr(result, 'usage_metadata'): usage = result.usage_metadata prompt_tokens = getattr(usage, 'input_tokens', 0) completion_tokens = getattr(usage, 'output_tokens', 0) response_body['usage'] = { 'prompt_tokens': prompt_tokens, 'completion_tokens': completion_tokens, 'total_tokens': prompt_tokens + completion_tokens } ctx.logger.info(f"📊 Token Usage: prompt={prompt_tokens}, completion={completion_tokens}") ctx.logger.info(f"✅ Chat completion: {len(content)} chars") ctx.logger.info("=" * 80) return ApiResponse( status=200, body=response_body ) except Exception as e: ctx.logger.error(f"❌ Non-streaming completion failed: {e}", exc_info=True) raise async def handle_streaming_response( model_with_tools, messages: List[Dict[str, Any]], completion_id: str, created_ts: int, model_name: str, langchain_service, ctx: FlowContext ): """ Handle streaming chat completion via SSE. Returns: Streaming response generator """ async def stream_generator(): try: # Set SSE headers await ctx.response.status(200) await ctx.response.headers({ "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" }) ctx.logger.info("🌊 Streaming started") # Stream chunks chunk_count = 0 total_content = "" async for chunk in langchain_service.astream_chat(model_with_tools, messages): # Extract delta content delta = chunk.content if hasattr(chunk, "content") else "" if delta: total_content += delta chunk_count += 1 # Build SSE data data = { "id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model_name, "choices": [{ "index": 0, "delta": {"content": delta}, "finish_reason": None }] } # Send SSE event await ctx.response.stream(f"data: {json.dumps(data, ensure_ascii=False)}\n\n") # Send finish event finish_data = { "id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model_name, "choices": [{ "index": 0, "delta": {}, "finish_reason": "stop" }] } await ctx.response.stream(f"data: {json.dumps(finish_data)}\n\n") # Send [DONE] await ctx.response.stream("data: [DONE]\n\n") # Close stream await ctx.response.close() ctx.logger.info(f"✅ Streaming completed: {chunk_count} chunks, {len(total_content)} chars") ctx.logger.info("=" * 80) except Exception as e: ctx.logger.error(f"❌ Streaming failed: {e}", exc_info=True) # Send error event error_data = { "error": { "message": str(e), "type": "server_error" } } await ctx.response.stream(f"data: {json.dumps(error_data)}\n\n") await ctx.response.close() return stream_generator() async def lookup_collection_by_aktenzeichen( aktenzeichen: str, ctx: FlowContext ) -> Optional[str]: """ Lookup xAI Collection ID for Aktenzeichen via EspoCRM. Search strategy: 1. Search for Raeumungsklage with matching advowareAkteBezeichner 2. Return xaiCollectionId if found Args: aktenzeichen: Normalized Aktenzeichen (e.g., "1234/56") ctx: Motia context Returns: Collection ID or None if not found """ try: # Initialize EspoCRM API espocrm = EspoCRMAPI(ctx) # Search Räumungsklage by advowareAkteBezeichner ctx.logger.info(f"🔍 Searching Räumungsklage for Aktenzeichen: {aktenzeichen}") search_result = await espocrm.search_entities( entity_type='Raeumungsklage', where=[{ 'type': 'equals', 'attribute': 'advowareAkteBezeichner', 'value': aktenzeichen }], select=['id', 'xaiCollectionId', 'advowareAkteBezeichner'], maxSize=1 ) if search_result and len(search_result) > 0: entity = search_result[0] collection_id = entity.get('xaiCollectionId') if collection_id: ctx.logger.info(f"✅ Found Räumungsklage: {entity.get('id')}") return collection_id else: ctx.logger.warn(f"⚠️ Räumungsklage found but no xaiCollectionId: {entity.get('id')}") else: ctx.logger.warn(f"⚠️ No Räumungsklage found for {aktenzeichen}") return None except Exception as e: ctx.logger.error(f"❌ Collection lookup failed: {e}", exc_info=True) return None