Refactor Akte and Document Sync Logic

- Removed the old VMH Document xAI Sync Handler implementation. - Introduced new xAI Upload Utilities for shared upload logic across sync flows. - Created a unified Akte sync structure with cron polling and event handling. - Implemented Akte Sync Cron Poller to manage pending Aktennummern with a debounce mechanism. - Developed Akte Sync Event Handler for synchronized processing across Advoware and xAI. - Enhanced logging and error handling throughout the new sync processes. - Ensured compatibility with existing Redis and EspoCRM services.
2026-03-26 01:23:16 +00:00
parent 86ec4db9db
commit b4d35b1790
10 changed files with 738 additions and 1805 deletions
--- a/services/aiknowledge_sync_utils.py
+++ b/services/aiknowledge_sync_utils.py
@@ -1,545 +0,0 @@
-"""
-AI Knowledge Sync Utilities
-
-Utility functions for synchronizing CAIKnowledge entities with XAI Collections:
- Collection lifecycle management (create, delete)
- Document synchronization with BLAKE3 hash verification
- Metadata-only updates via PATCH
- Orphan detection and cleanup
-"""
-
-import hashlib
-import json
-from typing import Dict, Any, Optional, List, Tuple
-from datetime import datetime
-from urllib.parse import unquote
-
-from services.sync_utils_base import BaseSyncUtils
-from services.models import (
-    AIKnowledgeActivationStatus,
-    AIKnowledgeSyncStatus,
-    JunctionSyncStatus
-)
-
-
-class AIKnowledgeSync(BaseSyncUtils):
-    """Utility class for AI Knowledge ↔ XAI Collections synchronization"""
-
-    def _get_lock_key(self, entity_id: str) -> str:
-        """Redis lock key for AI Knowledge entities"""
-        return f"sync_lock:aiknowledge:{entity_id}"
-
-    async def acquire_sync_lock(self, knowledge_id: str) -> bool:
-        """
-        Acquire distributed lock via Redis + update EspoCRM syncStatus.
-
-        Args:
-            knowledge_id: CAIKnowledge entity ID
-
-        Returns:
-            True if lock acquired, False if already locked
-        """
-        try:
-            # STEP 1: Atomic Redis lock
-            lock_key = self._get_lock_key(knowledge_id)
-            if not self._acquire_redis_lock(lock_key):
-                self._log(f"Redis lock already active for {knowledge_id}", level='warn')
-                return False
-
-            # STEP 2: Update syncStatus to pending_sync
-            try:
-                await self.espocrm.update_entity('CAIKnowledge', knowledge_id, {
-                    'syncStatus': AIKnowledgeSyncStatus.PENDING_SYNC.value
-                })
-            except Exception as e:
-                self._log(f"Could not set syncStatus: {e}", level='debug')
-
-            self._log(f"Sync lock acquired for {knowledge_id}")
-            return True
-
-        except Exception as e:
-            self._log(f"Error acquiring lock: {e}", level='error')
-            # Clean up Redis lock on error
-            lock_key = self._get_lock_key(knowledge_id)
-            self._release_redis_lock(lock_key)
-            return False
-
-    async def release_sync_lock(
-        self,
-        knowledge_id: str,
-        success: bool = True,
-        error_message: Optional[str] = None
-    ) -> None:
-        """
-        Release sync lock and set final status.
-
-        Args:
-            knowledge_id: CAIKnowledge entity ID
-            success: Whether sync succeeded
-            error_message: Optional error message
-        """
-        try:
-            update_data = {
-                'syncStatus': AIKnowledgeSyncStatus.SYNCED.value if success else AIKnowledgeSyncStatus.FAILED.value
-            }
-
-            if success:
-                update_data['lastSync'] = datetime.now().isoformat()
-                update_data['syncError'] = None
-            elif error_message:
-                update_data['syncError'] = error_message[:2000]
-
-            await self.espocrm.update_entity('CAIKnowledge', knowledge_id, update_data)
-
-            self._log(f"Sync lock released: {knowledge_id} → {'success' if success else 'failed'}")
-
-            # Release Redis lock
-            lock_key = self._get_lock_key(knowledge_id)
-            self._release_redis_lock(lock_key)
-
-        except Exception as e:
-            self._log(f"Error releasing lock: {e}", level='error')
-            # Ensure Redis lock is released
-            lock_key = self._get_lock_key(knowledge_id)
-            self._release_redis_lock(lock_key)
-
-    async def sync_knowledge_to_xai(self, knowledge_id: str, ctx) -> None:
-        """
-        Main sync orchestrator with activation status handling.
-
-        Args:
-            knowledge_id: CAIKnowledge entity ID
-            ctx: Motia context for logging
-        """
-        from services.espocrm import EspoCRMAPI
-        from services.xai_service import XAIService
-
-        espocrm = EspoCRMAPI(ctx)
-        xai = XAIService(ctx)
-
-        try:
-            # 1. Load knowledge entity
-            knowledge = await espocrm.get_entity('CAIKnowledge', knowledge_id)
-
-            activation_status = knowledge.get('aktivierungsstatus')
-            collection_id = knowledge.get('datenbankId')
-
-            ctx.logger.info("=" * 80)
-            ctx.logger.info(f"📋 Processing: {knowledge['name']}")
-            ctx.logger.info(f"   aktivierungsstatus: {activation_status}")
-            ctx.logger.info(f"   datenbankId: {collection_id or 'NONE'}")
-            ctx.logger.info("=" * 80)
-
-            # ═══════════════════════════════════════════════════════════
-            # CASE 1: NEW → Create Collection
-            # ═══════════════════════════════════════════════════════════
-            if activation_status == AIKnowledgeActivationStatus.NEW.value:
-                ctx.logger.info("🆕 Status 'new' → Creating XAI Collection")
-
-                collection = await xai.create_collection(
-                    name=knowledge['name'],
-                    metadata={
-                        'espocrm_entity_type': 'CAIKnowledge',
-                        'espocrm_entity_id': knowledge_id,
-                        'created_at': datetime.now().isoformat()
-                    }
-                )
-
-                # XAI API returns 'collection_id' not 'id'
-                collection_id = collection.get('collection_id') or collection.get('id')
-
-                # Update EspoCRM: Set datenbankId + change status to 'active'
-                await espocrm.update_entity('CAIKnowledge', knowledge_id, {
-                    'datenbankId': collection_id,
-                    'aktivierungsstatus': AIKnowledgeActivationStatus.ACTIVE.value,
-                    'syncStatus': AIKnowledgeSyncStatus.UNCLEAN.value
-                })
-
-                ctx.logger.info(f"✅ Collection created: {collection_id}")
-                ctx.logger.info("   Status changed to 'active', now syncing documents...")
-                
-                # Continue to document sync immediately (don't return)
-                # Fall through to sync logic below
-
-            # ═══════════════════════════════════════════════════════════
-            # CASE 2: DEACTIVATED → Delete Collection from XAI
-            # ═══════════════════════════════════════════════════════════
-            elif activation_status == AIKnowledgeActivationStatus.DEACTIVATED.value:
-                ctx.logger.info("🗑️  Status 'deactivated' → Deleting XAI Collection")
-
-                if collection_id:
-                    try:
-                        await xai.delete_collection(collection_id)
-                        ctx.logger.info(f"✅ Collection deleted from XAI: {collection_id}")
-                    except Exception as e:
-                        ctx.logger.error(f"❌ Failed to delete collection: {e}")
-                else:
-                    ctx.logger.info("⏭️  No collection ID, nothing to delete")
-
-                # Reset junction entries
-                documents = await espocrm.get_knowledge_documents_with_junction(knowledge_id)
-                
-                for doc in documents:
-                    doc_id = doc['documentId']
-                    try:
-                        await espocrm.update_knowledge_document_junction(
-                            knowledge_id,
-                            doc_id,
-                            {
-                                'syncstatus': 'new',
-                                'aiDocumentId': None
-                            },
-                            update_last_sync=False
-                        )
-                    except Exception as e:
-                        ctx.logger.warn(f"⚠️  Failed to reset junction for {doc_id}: {e}")
-
-                ctx.logger.info(f"✅ Deactivation complete, {len(documents)} junction entries reset")
-                return
-
-            # ═══════════════════════════════════════════════════════════
-            # CASE 3: PAUSED → Skip Sync
-            # ═══════════════════════════════════════════════════════════
-            elif activation_status == AIKnowledgeActivationStatus.PAUSED.value:
-                ctx.logger.info("⏸️  Status 'paused' → No sync performed")
-                return
-
-            # ═══════════════════════════════════════════════════════════
-            # CASE 4: ACTIVE → Normal Sync (or just created from NEW)
-            # ═══════════════════════════════════════════════════════════
-            if activation_status in (AIKnowledgeActivationStatus.ACTIVE.value, AIKnowledgeActivationStatus.NEW.value):
-                if not collection_id:
-                    ctx.logger.error("❌ Status 'active' but no datenbankId!")
-                    raise RuntimeError("Active knowledge without collection ID")
-
-                if activation_status == AIKnowledgeActivationStatus.ACTIVE.value:
-                    ctx.logger.info(f"🔄 Status 'active' → Syncing documents to {collection_id}")
-
-                # Verify collection exists
-                collection = await xai.get_collection(collection_id)
-                if not collection:
-                    ctx.logger.warn(f"⚠️  Collection {collection_id} not found, recreating")
-                    collection = await xai.create_collection(
-                        name=knowledge['name'],
-                        metadata={
-                            'espocrm_entity_type': 'CAIKnowledge',
-                            'espocrm_entity_id': knowledge_id
-                        }
-                    )
-                    collection_id = collection['id']
-                    await espocrm.update_entity('CAIKnowledge', knowledge_id, {
-                        'datenbankId': collection_id
-                    })
-
-                # Sync documents (both for ACTIVE status and after NEW → ACTIVE transition)
-                await self._sync_knowledge_documents(knowledge_id, collection_id, ctx)
-
-            elif activation_status not in (AIKnowledgeActivationStatus.DEACTIVATED.value, AIKnowledgeActivationStatus.PAUSED.value):
-                ctx.logger.error(f"❌ Unknown aktivierungsstatus: {activation_status}")
-                raise ValueError(f"Invalid aktivierungsstatus: {activation_status}")
-
-        finally:
-            await xai.close()
-
-    async def _sync_knowledge_documents(
-        self,
-        knowledge_id: str,
-        collection_id: str,
-        ctx
-    ) -> None:
-        """
-        Sync all documents of a knowledge base to XAI collection.
-        
-        Uses efficient JunctionData endpoint to get all documents with junction data
-        and blake3 hashes in a single API call. Hash comparison is always performed.
-
-        Args:
-            knowledge_id: CAIKnowledge entity ID
-            collection_id: XAI Collection ID
-            ctx: Motia context
-        """
-        from services.espocrm import EspoCRMAPI
-        from services.xai_service import XAIService
-
-        espocrm = EspoCRMAPI(ctx)
-        xai = XAIService(ctx)
-
-        # ═══════════════════════════════════════════════════════════════
-        # STEP 1: Load all documents with junction data (single API call)
-        # ═══════════════════════════════════════════════════════════════
-        ctx.logger.info(f"📥 Loading documents with junction data for knowledge {knowledge_id}")
-        
-        documents = await espocrm.get_knowledge_documents_with_junction(knowledge_id)
-        
-        ctx.logger.info(f"📊 Found {len(documents)} document(s)")
-
-        if not documents:
-            ctx.logger.info("✅ No documents to sync")
-            return
-
-        # ═══════════════════════════════════════════════════════════════
-        # STEP 2: Sync each document based on status/hash
-        # ═══════════════════════════════════════════════════════════════
-        successful = 0
-        failed = 0
-        skipped = 0        
-        # Track aiDocumentIds for orphan detection (collected during sync)
-        synced_file_ids: set = set()
-        for doc in documents:
-            doc_id = doc['documentId']
-            doc_name = doc.get('documentName', 'Unknown')
-            junction_status = doc.get('syncstatus', 'new')
-            ai_document_id = doc.get('aiDocumentId')
-            blake3_hash = doc.get('blake3hash')
-            
-            ctx.logger.info(f"\n📄 {doc_name} (ID: {doc_id})")
-            ctx.logger.info(f"   Status: {junction_status}")
-            ctx.logger.info(f"   aiDocumentId: {ai_document_id or 'N/A'}")
-            ctx.logger.info(f"   blake3hash: {blake3_hash[:16] if blake3_hash else 'N/A'}...")
-            
-            try:
-                # Decide if sync needed
-                needs_sync = False
-                reason = ""
-                
-                if junction_status in ['new', 'unclean', 'failed']:
-                    needs_sync = True
-                    reason = f"status={junction_status}"
-                elif junction_status == 'synced':
-                    # Synced status should have both blake3_hash and ai_document_id
-                    if not blake3_hash:
-                        needs_sync = True
-                        reason = "inconsistency: synced but no blake3 hash"
-                        ctx.logger.warn(f"   ⚠️  Synced document missing blake3 hash!")
-                    elif not ai_document_id:
-                        needs_sync = True
-                        reason = "inconsistency: synced but no aiDocumentId"
-                        ctx.logger.warn(f"   ⚠️  Synced document missing aiDocumentId!")
-                    else:
-                        # Verify Blake3 hash with XAI (always, since hash from JunctionData API is free)
-                        try:
-                            xai_doc_info = await xai.get_collection_document(collection_id, ai_document_id)
-                            if xai_doc_info:
-                                xai_blake3 = xai_doc_info.get('blake3_hash')
-                                
-                                if xai_blake3 != blake3_hash:
-                                    needs_sync = True
-                                    reason = f"blake3 mismatch (XAI: {xai_blake3[:16] if xai_blake3 else 'N/A'}... vs EspoCRM: {blake3_hash[:16]}...)"
-                                    ctx.logger.info(f"   🔄 Blake3 mismatch detected!")
-                                else:
-                                    ctx.logger.info(f"   ✅ Blake3 hash matches")
-                            else:
-                                needs_sync = True
-                                reason = "file not found in XAI collection"
-                                ctx.logger.warn(f"   ⚠️  Document marked synced but not in XAI!")
-                        except Exception as e:
-                            needs_sync = True
-                            reason = f"verification failed: {e}"
-                            ctx.logger.warn(f"   ⚠️  Failed to verify Blake3, will re-sync: {e}")
-                
-                if not needs_sync:
-                    ctx.logger.info(f"   ⏭️  Skipped (no sync needed)")
-                    # Document is already synced, track its aiDocumentId
-                    if ai_document_id:
-                        synced_file_ids.add(ai_document_id)
-                    skipped += 1
-                    continue
-                
-                ctx.logger.info(f"   🔄 Syncing: {reason}")
-                
-                # Get complete document entity with attachment info
-                doc_entity = await espocrm.get_entity('CDokumente', doc_id)
-                attachment_id = doc_entity.get('dokumentId')
-                
-                if not attachment_id:
-                    ctx.logger.error(f"   ❌ No attachment ID found for document {doc_id}")
-                    failed += 1
-                    continue
-                
-                # Get attachment details for MIME type and original filename
-                try:
-                    attachment = await espocrm.get_entity('Attachment', attachment_id)
-                    mime_type = attachment.get('type', 'application/octet-stream')
-                    file_size = attachment.get('size', 0)
-                    original_filename = attachment.get('name', doc_name)  # Original filename with extension
-                    # URL-decode filename (fixes special chars like §, ä, ö, ü, etc.)
-                    original_filename = unquote(original_filename)
-                except Exception as e:
-                    ctx.logger.warn(f"   ⚠️  Failed to get attachment details: {e}, using defaults")
-                    mime_type = 'application/octet-stream'
-                    file_size = 0
-                    original_filename = unquote(doc_name)  # Also decode fallback name
-                
-                ctx.logger.info(f"   📎 Attachment: {attachment_id} ({mime_type}, {file_size} bytes)")
-                ctx.logger.info(f"   📄 Original filename: {original_filename}")
-                
-                # Download document
-                file_content = await espocrm.download_attachment(attachment_id)
-                ctx.logger.info(f"   📥 Downloaded {len(file_content)} bytes")
-                
-                # Upload to XAI with original filename (includes extension)
-                filename = original_filename
-                
-                xai_file_id = await xai.upload_file(file_content, filename, mime_type)
-                ctx.logger.info(f"   📤 Uploaded to XAI: {xai_file_id}")
-                
-                # Add to collection
-                await xai.add_to_collection(collection_id, xai_file_id)
-                ctx.logger.info(f"   ✅ Added to collection {collection_id}")
-                
-                # Update junction
-                await espocrm.update_knowledge_document_junction(
-                    knowledge_id,
-                    doc_id,
-                    {
-                        'aiDocumentId': xai_file_id,
-                        'syncstatus': 'synced'
-                    },
-                    update_last_sync=True
-                )
-                ctx.logger.info(f"   ✅ Junction updated")
-                
-                # Track the new aiDocumentId for orphan detection
-                synced_file_ids.add(xai_file_id)
-                
-                successful += 1
-                
-            except Exception as e:
-                failed += 1
-                ctx.logger.error(f"   ❌ Sync failed: {e}")
-                
-                # Mark as failed in junction
-                try:
-                    await espocrm.update_knowledge_document_junction(
-                        knowledge_id,
-                        doc_id,
-                        {'syncstatus': 'failed'},
-                        update_last_sync=False
-                    )
-                except Exception as update_err:
-                    ctx.logger.error(f"   ❌ Failed to update junction status: {update_err}")
-
-        # ═══════════════════════════════════════════════════════════════
-        # STEP 3: Remove orphaned documents from XAI collection
-        # ═══════════════════════════════════════════════════════════════
-        try:
-            ctx.logger.info(f"\n🧹 Checking for orphaned documents in XAI collection...")
-            
-            # Get all files in XAI collection (normalized structure)
-            xai_documents = await xai.list_collection_documents(collection_id)
-            xai_file_ids = {doc.get('file_id') for doc in xai_documents if doc.get('file_id')}
-            
-            # Use synced_file_ids (collected during this sync) for orphan detection
-            # This includes both pre-existing synced docs and newly uploaded ones
-            ctx.logger.info(f"   XAI has {len(xai_file_ids)} files, we have {len(synced_file_ids)} synced")
-            
-            # Find orphans (in XAI but not in our current sync)
-            orphans = xai_file_ids - synced_file_ids
-            
-            if orphans:
-                ctx.logger.info(f"   Found {len(orphans)} orphaned file(s)")
-                for orphan_id in orphans:
-                    try:
-                        await xai.remove_from_collection(collection_id, orphan_id)
-                        ctx.logger.info(f"   🗑️  Removed {orphan_id}")
-                    except Exception as e:
-                        ctx.logger.warn(f"   ⚠️  Failed to remove {orphan_id}: {e}")
-            else:
-                ctx.logger.info(f"   ✅ No orphans found")
-                
-        except Exception as e:
-            ctx.logger.warn(f"⚠️  Failed to clean up orphans: {e}")
-
-        # ═══════════════════════════════════════════════════════════════
-        # STEP 4: Summary
-        # ═══════════════════════════════════════════════════════════════
-        ctx.logger.info("")
-        ctx.logger.info("=" * 80)
-        ctx.logger.info(f"📊 Sync Statistics:")
-        ctx.logger.info(f"   ✅ Synced: {successful}")
-        ctx.logger.info(f"   ⏭️  Skipped: {skipped}")
-        ctx.logger.info(f"   ❌ Failed: {failed}")
-        ctx.logger.info(f"   Mode: Blake3 hash verification enabled")
-        ctx.logger.info("=" * 80)
-
-    def _calculate_metadata_hash(self, document: Dict) -> str:
-        """
-        Calculate hash of sync-relevant metadata.
-
-        Args:
-            document: CDokumente entity
-
-        Returns:
-            MD5 hash (32 chars)
-        """
-        metadata = {
-            'name': document.get('name', ''),
-            'description': document.get('description', ''),
-        }
-
-        metadata_str = json.dumps(metadata, sort_keys=True)
-        return hashlib.md5(metadata_str.encode()).hexdigest()
-
-    def _build_xai_metadata(self, document: Dict) -> Dict[str, str]:
-        """
-        Build XAI metadata from CDokumente entity.
-
-        Args:
-            document: CDokumente entity
-
-        Returns:
-            Metadata dict for XAI
-        """
-        return {
-            'document_name': document.get('name', ''),
-            'description': document.get('description', ''),
-            'created_at': document.get('createdAt', ''),
-            'modified_at': document.get('modifiedAt', ''),
-            'espocrm_id': document.get('id', '')
-        }
-
-    async def _get_document_download_info(
-        self,
-        document: Dict,
-        ctx
-    ) -> Optional[Dict[str, Any]]:
-        """
-        Get download info for CDokumente entity.
-
-        Args:
-            document: CDokumente entity
-            ctx: Motia context
-
-        Returns:
-            Dict with attachment_id, filename, mime_type
-        """
-        from services.espocrm import EspoCRMAPI
-
-        espocrm = EspoCRMAPI(ctx)
-
-        # Check for dokumentId (CDokumente custom field)
-        attachment_id = None
-        filename = None
-
-        if document.get('dokumentId'):
-            attachment_id = document.get('dokumentId')
-            filename = document.get('dokumentName')
-        elif document.get('fileId'):
-            attachment_id = document.get('fileId')
-            filename = document.get('fileName')
-
-        if not attachment_id:
-            ctx.logger.error(f"❌ No attachment ID for document {document['id']}")
-            return None
-
-        # Get attachment details
-        try:
-            attachment = await espocrm.get_entity('Attachment', attachment_id)
-            return {
-                'attachment_id': attachment_id,
-                'filename': filename or attachment.get('name', 'unknown'),
-                'mime_type': attachment.get('type', 'application/octet-stream')
-            }
-        except Exception as e:
-            ctx.logger.error(f"❌ Failed to get attachment {attachment_id}: {e}")
-            return None
--- a/services/xai_upload_utils.py
+++ b/services/xai_upload_utils.py
@@ -0,0 +1,201 @@
+"""
+xAI Upload Utilities
+
+Shared logic for uploading documents from EspoCRM to xAI Collections.
+Used by all sync flows (Advoware + direct xAI sync).
+
+Handles:
+- Blake3 hash-based change detection
+- Upload to xAI with correct filename/MIME
+- Collection management (create/verify)
+- EspoCRM metadata update after sync
+"""
+
+from typing import Optional, Dict, Any
+from datetime import datetime
+
+
+class XAIUploadUtils:
+    """
+    Stateless utility class for document upload operations to xAI.
+    
+    All methods take explicit service instances to remain reusable
+    across different sync contexts.
+    """
+
+    def __init__(self, ctx):
+        from services.logging_utils import get_service_logger
+        self._log = get_service_logger(__name__, ctx)
+
+    async def ensure_collection(
+        self,
+        akte: Dict[str, Any],
+        xai,
+        espocrm,
+    ) -> Optional[str]:
+        """
+        Ensure xAI collection exists for this Akte.
+        Creates one if missing, verifies it if present.
+
+        Returns:
+            collection_id or None on failure
+        """
+        akte_id = akte['id']
+        akte_name = akte.get('name', f"Akte {akte.get('aktennummer', akte_id)}")
+        collection_id = akte.get('aiCollectionId')
+
+        if collection_id:
+            # Verify it still exists in xAI
+            try:
+                col = await xai.get_collection(collection_id)
+                if col:
+                    self._log.debug(f"Collection {collection_id} verified for '{akte_name}'")
+                    return collection_id
+                self._log.warn(f"Collection {collection_id} not found in xAI, recreating...")
+            except Exception as e:
+                self._log.warn(f"Could not verify collection {collection_id}: {e}, recreating...")
+
+        # Create new collection
+        try:
+            self._log.info(f"Creating xAI collection for '{akte_name}'...")
+            col = await xai.create_collection(
+                name=akte_name,
+                metadata={
+                    'espocrm_entity_type': 'CAkten',
+                    'espocrm_entity_id': akte_id,
+                    'aktennummer': str(akte.get('aktennummer', '')),
+                }
+            )
+            collection_id = col['id']
+            self._log.info(f"✅ Collection created: {collection_id}")
+
+            # Save back to EspoCRM
+            await espocrm.update_entity('CAkten', akte_id, {
+                'aiCollectionId': collection_id,
+                'aiSyncStatus': 'unclean',  # Trigger full doc sync
+            })
+            return collection_id
+
+        except Exception as e:
+            self._log.error(f"❌ Failed to create xAI collection: {e}")
+            return None
+
+    async def sync_document_to_xai(
+        self,
+        doc: Dict[str, Any],
+        collection_id: str,
+        xai,
+        espocrm,
+    ) -> bool:
+        """
+        Sync a single CDokumente entity to xAI collection.
+
+        Decision logic (Blake3-based):
+        - aiSyncStatus in ['new', 'unclean', 'failed']    → always sync
+        - aiSyncStatus == 'synced' AND aiSyncHash == blake3hash → skip (no change)
+        - aiSyncStatus == 'synced' AND aiSyncHash != blake3hash → re-upload (changed)
+        - No attachment                                     → mark unsupported
+
+        Returns:
+            True if synced/skipped successfully, False on error
+        """
+        doc_id = doc['id']
+        doc_name = doc.get('name', doc_id)
+        ai_status = doc.get('aiSyncStatus', 'new')
+        ai_sync_hash = doc.get('aiSyncHash')
+        blake3_hash = doc.get('blake3hash')
+        ai_file_id = doc.get('aiFileId')
+
+        self._log.info(f"  📄 {doc_name}")
+        self._log.info(f"     aiSyncStatus={ai_status}, aiSyncHash={ai_sync_hash[:12] if ai_sync_hash else 'N/A'}..., blake3={blake3_hash[:12] if blake3_hash else 'N/A'}...")
+
+        # Skip if already synced and hash matches
+        if ai_status == 'synced' and ai_sync_hash and blake3_hash and ai_sync_hash == blake3_hash:
+            self._log.info(f"     ⏭️  Skipped (hash match, no change)")
+            return True
+
+        # Get attachment info
+        attachment_id = doc.get('dokumentId')
+        if not attachment_id:
+            self._log.warn(f"     ⚠️  No attachment (dokumentId missing) - marking unsupported")
+            await espocrm.update_entity('CDokumente', doc_id, {
+                'aiSyncStatus': 'unsupported',
+                'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            })
+            return True  # Not an error, just unsupported
+
+        try:
+            # Download from EspoCRM
+            self._log.info(f"     📥 Downloading attachment {attachment_id}...")
+            file_content = await espocrm.download_attachment(attachment_id)
+            self._log.info(f"     Downloaded {len(file_content)} bytes")
+
+            # Determine filename + MIME type
+            filename = doc.get('dokumentName') or doc.get('name', 'document.bin')
+            from urllib.parse import unquote
+            filename = unquote(filename)
+
+            import mimetypes
+            mime_type, _ = mimetypes.guess_type(filename)
+            if not mime_type:
+                mime_type = 'application/octet-stream'
+
+            # Remove old file from collection if updating
+            if ai_file_id and ai_status != 'new':
+                try:
+                    await xai.remove_from_collection(collection_id, ai_file_id)
+                    self._log.info(f"     🗑️  Removed old xAI file {ai_file_id}")
+                except Exception:
+                    pass  # Non-fatal - may already be gone
+
+            # Upload to xAI
+            self._log.info(f"     📤 Uploading '{filename}' ({mime_type})...")
+            new_xai_file_id = await xai.upload_file(file_content, filename, mime_type)
+            self._log.info(f"     Uploaded: xai_file_id={new_xai_file_id}")
+
+            # Add to collection
+            await xai.add_to_collection(collection_id, new_xai_file_id)
+            self._log.info(f"     ✅ Added to collection {collection_id}")
+
+            # Update CDokumente with sync result
+            now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            await espocrm.update_entity('CDokumente', doc_id, {
+                'aiFileId': new_xai_file_id,
+                'aiCollectionId': collection_id,
+                'aiSyncHash': blake3_hash or doc.get('syncedHash'),
+                'aiSyncStatus': 'synced',
+                'aiLastSync': now,
+            })
+            self._log.info(f"     ✅ EspoCRM updated")
+            return True
+
+        except Exception as e:
+            self._log.error(f"     ❌ Failed: {e}")
+            await espocrm.update_entity('CDokumente', doc_id, {
+                'aiSyncStatus': 'failed',
+                'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            })
+            return False
+
+    async def remove_document_from_xai(
+        self,
+        doc: Dict[str, Any],
+        collection_id: str,
+        xai,
+        espocrm,
+    ) -> None:
+        """Remove a CDokumente from its xAI collection (called on DELETE)."""
+        doc_id = doc['id']
+        ai_file_id = doc.get('aiFileId')
+        if not ai_file_id:
+            return
+        try:
+            await xai.remove_from_collection(collection_id, ai_file_id)
+            self._log.info(f"  🗑️  Removed {doc.get('name')} from xAI collection")
+            await espocrm.update_entity('CDokumente', doc_id, {
+                'aiFileId': None,
+                'aiSyncStatus': 'new',
+                'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            })
+        except Exception as e:
+            self._log.warn(f"  ⚠️  Could not remove from xAI: {e}")