""" xAI Upload Utilities Shared logic for uploading documents from EspoCRM to xAI Collections. Used by all sync flows (Advoware + direct xAI sync). Handles: - Blake3 hash-based change detection - Upload to xAI with correct filename/MIME - Collection management (create/verify) - EspoCRM metadata update after sync """ from typing import Optional, Dict, Any from datetime import datetime class XAIUploadUtils: """ Stateless utility class for document upload operations to xAI. All methods take explicit service instances to remain reusable across different sync contexts. """ def __init__(self, ctx): from services.logging_utils import get_service_logger self._log = get_service_logger(__name__, ctx) async def ensure_collection( self, akte: Dict[str, Any], xai, espocrm, ) -> Optional[str]: """ Ensure xAI collection exists for this Akte. Creates one if missing, verifies it if present. Returns: collection_id or None on failure """ akte_id = akte['id'] akte_name = akte.get('name', f"Akte {akte.get('aktennummer', akte_id)}") collection_id = akte.get('aiCollectionId') if collection_id: # Verify it still exists in xAI try: col = await xai.get_collection(collection_id) if col: self._log.debug(f"Collection {collection_id} verified for '{akte_name}'") return collection_id self._log.warn(f"Collection {collection_id} not found in xAI, recreating...") except Exception as e: self._log.warn(f"Could not verify collection {collection_id}: {e}, recreating...") # Create new collection try: self._log.info(f"Creating xAI collection for '{akte_name}'...") col = await xai.create_collection( name=akte_name, ) collection_id = col.get('collection_id') or col.get('id') self._log.info(f"✅ Collection created: {collection_id}") # Save back to EspoCRM await espocrm.update_entity('CAkten', akte_id, { 'aiCollectionId': collection_id, 'aiSyncStatus': 'unclean', # Trigger full doc sync }) return collection_id except Exception as e: self._log.error(f"❌ Failed to create xAI collection: {e}") return None async def sync_document_to_xai( self, doc: Dict[str, Any], collection_id: str, xai, espocrm, ) -> bool: """ Sync a single CDokumente entity to xAI collection. Decision logic (Blake3-based): - aiSyncStatus in ['new', 'unclean', 'failed'] → always sync - aiSyncStatus == 'synced' AND aiSyncHash == blake3hash → skip (no change) - aiSyncStatus == 'synced' AND aiSyncHash != blake3hash → re-upload (changed) - No attachment → mark unsupported Returns: True if synced/skipped successfully, False on error """ doc_id = doc['id'] doc_name = doc.get('name', doc_id) ai_status = doc.get('aiSyncStatus', 'new') ai_sync_hash = doc.get('aiSyncHash') blake3_hash = doc.get('blake3hash') ai_file_id = doc.get('aiFileId') self._log.info(f" 📄 {doc_name}") self._log.info(f" aiSyncStatus={ai_status}, aiSyncHash={ai_sync_hash[:12] if ai_sync_hash else 'N/A'}..., blake3={blake3_hash[:12] if blake3_hash else 'N/A'}...") # File content unchanged (hash match) → kein Re-Upload nötig if ai_status == 'synced' and ai_sync_hash and blake3_hash and ai_sync_hash == blake3_hash: if ai_file_id: self._log.info(f" ✅ Unverändert – kein Re-Upload (hash match)") else: self._log.info(f" ⏭️ Skipped (hash match, kein aiFileId)") return True # Get attachment info attachment_id = doc.get('dokumentId') if not attachment_id: self._log.warn(f" ⚠️ No attachment (dokumentId missing) - marking unsupported") await espocrm.update_entity('CDokumente', doc_id, { 'aiSyncStatus': 'unsupported', 'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), }) return True # Not an error, just unsupported try: # Download from EspoCRM self._log.info(f" 📥 Downloading attachment {attachment_id}...") file_content = await espocrm.download_attachment(attachment_id) self._log.info(f" Downloaded {len(file_content)} bytes") # Determine filename + MIME type filename = doc.get('dokumentName') or doc.get('name', 'document.bin') from urllib.parse import unquote filename = unquote(filename) import mimetypes mime_type, _ = mimetypes.guess_type(filename) if not mime_type: mime_type = 'application/octet-stream' # Remove old file from collection if updating if ai_file_id and ai_status != 'new': try: await xai.remove_from_collection(collection_id, ai_file_id) self._log.info(f" 🗑️ Removed old xAI file {ai_file_id}") except Exception: pass # Non-fatal - may already be gone # Build metadata fields – werden einmalig beim Upload gesetzt; # Custom fields können nachträglich NICHT aktualisiert werden. # xAI erlaubt KEINE leeren Strings als Feldwerte → nur befüllte Felder senden. fields_raw = { 'document_name': doc.get('name', filename), 'description': str(doc.get('beschreibung', '') or ''), 'advoware_art': str(doc.get('advowareArt', '') or ''), 'advoware_bemerkung': str(doc.get('advowareBemerkung', '') or ''), 'espocrm_id': doc['id'], 'created_at': str(doc.get('createdAt', '') or ''), 'modified_at': str(doc.get('modifiedAt', '') or ''), } fields = {k: v for k, v in fields_raw.items() if v} # Single-request upload directly to collection incl. metadata fields self._log.info(f" 📤 Uploading '{filename}' ({mime_type}) with metadata...") new_xai_file_id = await xai.upload_to_collection( collection_id, file_content, filename, mime_type, fields=fields ) self._log.info(f" ✅ Uploaded + metadata set: {new_xai_file_id}") # Update CDokumente with sync result now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') await espocrm.update_entity('CDokumente', doc_id, { 'aiFileId': new_xai_file_id, 'aiCollectionId': collection_id, 'aiSyncHash': blake3_hash or doc.get('syncedHash'), 'aiSyncStatus': 'synced', 'aiLastSync': now, }) self._log.info(f" ✅ EspoCRM updated") return True except Exception as e: self._log.error(f" ❌ Failed: {e}") await espocrm.update_entity('CDokumente', doc_id, { 'aiSyncStatus': 'failed', 'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), }) return False async def remove_document_from_xai( self, doc: Dict[str, Any], collection_id: str, xai, espocrm, ) -> None: """Remove a CDokumente from its xAI collection (called on DELETE).""" doc_id = doc['id'] ai_file_id = doc.get('aiFileId') if not ai_file_id: return try: await xai.remove_from_collection(collection_id, ai_file_id) self._log.info(f" 🗑️ Removed {doc.get('name')} from xAI collection") await espocrm.update_entity('CDokumente', doc_id, { 'aiFileId': None, 'aiSyncStatus': 'new', 'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), }) except Exception as e: self._log.warn(f" ⚠️ Could not remove from xAI: {e}") class XAIProviderAdapter: """ Adapter der XAIService auf das Provider-Interface bringt, das AIKnowledgeSyncUtils erwartet. Interface (identisch mit RAGFlowService): ensure_dataset(name, description) -> dict mit 'id' list_documents(dataset_id) -> list[dict] mit 'id', 'name' upload_document(dataset_id, file_content, filename, mime_type, blake3_hash, espocrm_id, description, advoware_art, advoware_bemerkung) -> dict mit 'id' update_document_meta(dataset_id, doc_id, ...) -> None remove_document(dataset_id, doc_id) -> None delete_dataset(dataset_id) -> None is_mime_type_supported(mime_type) -> bool """ def __init__(self, ctx=None): from services.xai_service import XAIService from services.logging_utils import get_service_logger self._xai = XAIService(ctx) self._log = get_service_logger('xai_adapter', ctx) async def ensure_dataset(self, name: str, description: str = '') -> dict: """Erstellt oder verifiziert eine xAI Collection. Gibt {'id': collection_id} zurueck.""" existing = await self._xai.get_collection_by_name(name) if existing: col_id = existing.get('collection_id') or existing.get('id') return {'id': col_id, 'name': name} result = await self._xai.create_collection(name=name) col_id = result.get('collection_id') or result.get('id') return {'id': col_id, 'name': name} async def list_documents(self, dataset_id: str) -> list: """Listet alle Dokumente in einer xAI Collection auf.""" raw = await self._xai.list_collection_documents(dataset_id) return [{'id': d.get('file_id'), 'name': d.get('filename')} for d in raw] async def upload_document( self, dataset_id: str, file_content: bytes, filename: str, mime_type: str = 'application/octet-stream', blake3_hash=None, espocrm_id=None, description=None, advoware_art=None, advoware_bemerkung=None, ) -> dict: """Laedt Dokument in xAI Collection mit Metadata-Fields.""" fields_raw = { 'document_name': filename, 'espocrm_id': espocrm_id or '', 'description': description or '', 'advoware_art': advoware_art or '', 'advoware_bemerkung': advoware_bemerkung or '', } if blake3_hash: fields_raw['blake3_hash'] = blake3_hash fields = {k: v for k, v in fields_raw.items() if v} file_id = await self._xai.upload_to_collection( collection_id=dataset_id, file_content=file_content, filename=filename, mime_type=mime_type, fields=fields, ) return {'id': file_id, 'name': filename} async def update_document_meta( self, dataset_id: str, doc_id: str, blake3_hash=None, description=None, advoware_art=None, advoware_bemerkung=None, ) -> None: """ xAI unterstuetzt kein PATCH fuer Metadaten. Re-Upload wird vom Caller gesteuert (via syncedMetadataHash Aenderung fuehrt zum vollstaendigen Upload-Path). Hier kein-op. """ self._log.warn( "XAIProviderAdapter.update_document_meta: xAI unterstuetzt kein " "Metadaten-PATCH – kein-op. Naechster Sync loest Re-Upload aus." ) async def remove_document(self, dataset_id: str, doc_id: str) -> None: """Loescht Dokument aus xAI Collection (Datei bleibt in xAI Files API).""" await self._xai.remove_from_collection(dataset_id, doc_id) async def delete_dataset(self, dataset_id: str) -> None: """Loescht xAI Collection.""" await self._xai.delete_collection(dataset_id) def is_mime_type_supported(self, mime_type: str) -> bool: return self._xai.is_mime_type_supported(mime_type)