From 8f1533337cda6dc5f7698e59de18bbe1056c0e2c Mon Sep 17 00:00:00 2001 From: bsiggel Date: Thu, 12 Mar 2026 22:35:48 +0000 Subject: [PATCH] feat: Enhance AI Knowledge sync process with full sync mode and attachment handling --- services/aiknowledge_sync_utils.py | 25 +++++++++++++++++--- steps/vmh/aiknowledge_full_sync_cron_step.py | 7 ++---- steps/vmh/aiknowledge_sync_event_step.py | 4 +++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/services/aiknowledge_sync_utils.py b/services/aiknowledge_sync_utils.py index 7a7283e..c069a9c 100644 --- a/services/aiknowledge_sync_utils.py +++ b/services/aiknowledge_sync_utils.py @@ -227,7 +227,7 @@ class AIKnowledgeSync(BaseSyncUtils): }) # Sync documents - await self._sync_knowledge_documents(knowledge_id, collection_id, ctx) + await self._sync_knowledge_documents(knowledge_id, collection_id, ctx, full_sync=full_sync) else: ctx.logger.error(f"❌ Unknown activationStatus: {activation_status}") @@ -327,14 +327,33 @@ class AIKnowledgeSync(BaseSyncUtils): ctx.logger.info(f" 🔄 Syncing: {reason}") + # Get complete document entity with attachment info + doc_entity = await espocrm.get_entity('CDokumente', doc_id) + attachment_id = doc_entity.get('dokumentId') + + if not attachment_id: + ctx.logger.error(f" ❌ No attachment ID found for document {doc_id}") + failed += 1 + continue + + # Get attachment details for MIME type + try: + attachment = await espocrm.get_entity('Attachment', attachment_id) + mime_type = attachment.get('type', 'application/octet-stream') + file_size = attachment.get('size', 0) + except Exception as e: + ctx.logger.warn(f" ⚠️ Failed to get attachment details: {e}, using defaults") + mime_type = 'application/octet-stream' + file_size = 0 + + ctx.logger.info(f" 📎 Attachment: {attachment_id} ({mime_type}, {file_size} bytes)") + # Download document - attachment_id = doc.get('documentId') # TODO: Get correct attachment ID from CDokumente file_content = await espocrm.download_attachment(attachment_id) ctx.logger.info(f" 📥 Downloaded {len(file_content)} bytes") # Upload to XAI filename = doc_name - mime_type = 'application/octet-stream' # TODO: Get from attachment xai_file_id = await xai.upload_file(file_content, filename, mime_type) ctx.logger.info(f" 📤 Uploaded to XAI: {xai_file_id}") diff --git a/steps/vmh/aiknowledge_full_sync_cron_step.py b/steps/vmh/aiknowledge_full_sync_cron_step.py index 18450db..cbb1fe8 100644 --- a/steps/vmh/aiknowledge_full_sync_cron_step.py +++ b/steps/vmh/aiknowledge_full_sync_cron_step.py @@ -69,11 +69,8 @@ async def handler(input_data: None, ctx: FlowContext[Any]) -> None: 'topic': 'aiknowledge.sync', 'data': { 'knowledge_id': entity['id'], - 'source': 'daily_full_sync' - } - }) - - ctx.logger.info( + 'source': 'daily_full_sync', + 'full_sync': True # Enable Blake3 verification f"📤 [{i}/{total}] Enqueued: {entity['name']} " f"(syncStatus={entity.get('syncStatus')})" ) diff --git a/steps/vmh/aiknowledge_sync_event_step.py b/steps/vmh/aiknowledge_sync_event_step.py index f3ca95b..d1df249 100644 --- a/steps/vmh/aiknowledge_sync_event_step.py +++ b/steps/vmh/aiknowledge_sync_event_step.py @@ -36,6 +36,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None: # Extract data knowledge_id = event_data.get('knowledge_id') source = event_data.get('source', 'unknown') + full_sync = event_data.get('full_sync', False) # Blake3 verification mode if not knowledge_id: ctx.logger.error("❌ Missing knowledge_id in event data") @@ -43,6 +44,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None: ctx.logger.info(f"📋 Knowledge ID: {knowledge_id}") ctx.logger.info(f"📋 Source: {source}") + ctx.logger.info(f"📋 Full Sync Mode: {full_sync}") ctx.logger.info("=" * 80) # Get Redis for locking @@ -61,7 +63,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None: try: # Perform sync - await sync_utils.sync_knowledge_to_xai(knowledge_id, ctx) + await sync_utils.sync_knowledge_to_xai(knowledge_id, ctx, full_sync=full_sync) ctx.logger.info("=" * 80) ctx.logger.info("✅ AI KNOWLEDGE SYNC COMPLETED")