diff --git a/steps/ai/__init__.py b/steps/ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/steps/vmh/xai_chat_completion_api_step.py b/steps/ai/chat_completions_api_step.py similarity index 77% rename from steps/vmh/xai_chat_completion_api_step.py rename to steps/ai/chat_completions_api_step.py index 36d1e69..67256f1 100644 --- a/steps/vmh/xai_chat_completion_api_step.py +++ b/steps/ai/chat_completions_api_step.py @@ -1,7 +1,13 @@ -"""VMH xAI Chat Completions API +"""AI Chat Completions API -OpenAI-kompatible Chat Completions API mit xAI/LangChain Backend. -UnterstΓΌtzt file_search ΓΌber xAI Collections (RAG). +Universal OpenAI-compatible Chat Completions API with xAI/LangChain Backend. + +Features: +- File Search (RAG) via xAI Collections +- Web Search via xAI web_search tool +- Aktenzeichen-based automatic collection lookup +- Streaming & Non-Streaming support +- Multiple tools simultaneously (file_search + web_search) """ import json import time @@ -10,11 +16,11 @@ from motia import FlowContext, http, ApiRequest, ApiResponse config = { - "name": "VMH xAI Chat Completions API", - "description": "OpenAI-compatible Chat Completions API with xAI LangChain backend", - "flows": ["vmh-chat"], + "name": "AI Chat Completions API", + "description": "Universal OpenAI-compatible Chat Completions API with xAI backend, RAG, and web search", + "flows": ["ai-general"], "triggers": [ - http("POST", "/vmh/v1/chat/completions") + http("POST", "/ai/chat/completions") ], } @@ -25,7 +31,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: Request Body (OpenAI format): { - "model": "grok-2-latest", + "model": "grok-4.20-beta-0309-reasoning", "messages": [ {"role": "system", "content": "You are helpful"}, {"role": "user", "content": "1234/56 Was ist der Stand?"} @@ -47,7 +53,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: Aktenzeichen-Erkennung (Priority): 1. extra_body.collection_id (explicit override) 2. First user message starts with Aktenzeichen (e.g., "1234/56 ...") - 3. Error 400 if no collection_id found (strict mode) + 3. Web-only mode if no collection_id (must enable_web_search) Response (OpenAI format): Non-Streaming: @@ -55,7 +61,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: "id": "chatcmpl-...", "object": "chat.completion", "created": 1234567890, - "model": "grok-2-latest", + "model": "grok-4.20-beta-0309-reasoning", "choices": [{ "index": 0, "message": {"role": "assistant", "content": "..."}, @@ -75,7 +81,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: from services.espocrm import EspoCRMAPI ctx.logger.info("=" * 80) - ctx.logger.info("πŸ’¬ VMH CHAT COMPLETIONS API") + ctx.logger.info("πŸ€– AI CHAT COMPLETIONS API") ctx.logger.info("=" * 80) try: @@ -90,7 +96,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: ) # Extract parameters - model_name = body.get('model', 'grok-4-1-fast-reasoning') + model_name = body.get('model', 'grok-4.20-beta-0309-reasoning') messages = body.get('messages', []) temperature = body.get('temperature', 0.7) max_tokens = body.get('max_tokens') @@ -107,7 +113,16 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: ctx.logger.info(f"πŸ“‹ Web Search: {'enabled' if enable_web_search else 'disabled'}") if enable_web_search and web_search_config: ctx.logger.debug(f"Web Search Config: {json.dumps(web_search_config, indent=2)}") - ctx.logger.debug(f"Messages: {json.dumps(messages, indent=2, ensure_ascii=False)}") + + # Log full conversation messages + ctx.logger.info("-" * 80) + ctx.logger.info("πŸ“¨ REQUEST MESSAGES:") + for i, msg in enumerate(messages, 1): + role = msg.get('role', 'unknown') + content = msg.get('content', '') + preview = content[:150] + "..." if len(content) > 150 else content + ctx.logger.info(f" [{i}] {role}: {preview}") + ctx.logger.info("-" * 80) # Validate messages if not messages or not isinstance(messages, list): @@ -117,7 +132,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: body={'error': 'messages must be non-empty array'} ) - # Determine collection_id (Priority: extra_body > Aktenzeichen > error) + # Determine collection_id (Priority: extra_body > Aktenzeichen > optional for web-only) collection_id: Optional[str] = None aktenzeichen: Optional[str] = None @@ -222,7 +237,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse: except Exception as e: ctx.logger.error("=" * 80) - ctx.logger.error("❌ ERROR: CHAT COMPLETIONS API") + ctx.logger.error("❌ ERROR: AI CHAT COMPLETIONS API") ctx.logger.error("=" * 80) ctx.logger.error(f"Error: {e}", exc_info=True) ctx.logger.error(f"Request body: {json.dumps(request.body, indent=2, ensure_ascii=False)}") @@ -256,8 +271,23 @@ async def handle_non_streaming_response( # Invoke model result = await langchain_service.invoke_chat(model_with_tools, messages) - # Extract content - content = result.content if hasattr(result, 'content') else str(result) + # Extract content - handle both string and structured responses + if hasattr(result, 'content'): + raw_content = result.content + + # If content is a list (tool calls + text message), extract text + if isinstance(raw_content, list): + # Find the text message (usually last element with type='text') + text_messages = [ + item.get('text', '') + for item in raw_content + if isinstance(item, dict) and item.get('type') == 'text' + ] + content = text_messages[0] if text_messages else str(raw_content) + else: + content = raw_content + else: + content = str(result) # Build OpenAI-compatible response response_body = { @@ -292,8 +322,32 @@ async def handle_non_streaming_response( } ctx.logger.info(f"πŸ“Š Token Usage: prompt={prompt_tokens}, completion={completion_tokens}") + # Log citations if available (from tool response annotations) + if hasattr(result, 'content') and isinstance(result.content, list): + # Extract citations from structured response + for item in result.content: + if isinstance(item, dict) and item.get('type') == 'text': + annotations = item.get('annotations', []) + if annotations: + ctx.logger.info(f"πŸ”— Citations: {len(annotations)}") + for i, citation in enumerate(annotations[:10], 1): # Log first 10 + url = citation.get('url', 'N/A') + title = citation.get('title', '') + if url.startswith('collections://'): + # Internal collection reference + ctx.logger.debug(f" [{i}] Collection Document: {title}") + else: + # External URL + ctx.logger.debug(f" [{i}] {url}") + + # Log complete response content ctx.logger.info(f"βœ… Chat completion: {len(content)} chars") ctx.logger.info("=" * 80) + ctx.logger.info("πŸ“ COMPLETE RESPONSE:") + ctx.logger.info("-" * 80) + ctx.logger.info(content) + ctx.logger.info("-" * 80) + ctx.logger.info("=" * 80) return ApiResponse( status=200, @@ -337,8 +391,23 @@ async def handle_streaming_response( total_content = "" async for chunk in langchain_service.astream_chat(model_with_tools, messages): - # Extract delta content - delta = chunk.content if hasattr(chunk, "content") else "" + # Extract delta content - handle structured chunks + if hasattr(chunk, "content"): + chunk_content = chunk.content + + # If chunk content is a list (tool calls), extract text parts + if isinstance(chunk_content, list): + # Accumulate only text deltas + text_parts = [ + item.get('text', '') + for item in chunk_content + if isinstance(item, dict) and item.get('type') == 'text' + ] + delta = ''.join(text_parts) + else: + delta = chunk_content + else: + delta = "" if delta: total_content += delta @@ -380,8 +449,14 @@ async def handle_streaming_response( # Close stream await ctx.response.close() + # Log complete streamed response ctx.logger.info(f"βœ… Streaming completed: {chunk_count} chunks, {len(total_content)} chars") ctx.logger.info("=" * 80) + ctx.logger.info("πŸ“ COMPLETE STREAMED RESPONSE:") + ctx.logger.info("-" * 80) + ctx.logger.info(total_content) + ctx.logger.info("-" * 80) + ctx.logger.info("=" * 80) except Exception as e: ctx.logger.error(f"❌ Streaming failed: {e}", exc_info=True)