Files
motia-iii/services/xai_upload_utils.py

218 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
xAI Upload Utilities
Shared logic for uploading documents from EspoCRM to xAI Collections.
Used by all sync flows (Advoware + direct xAI sync).
Handles:
- Blake3 hash-based change detection
- Upload to xAI with correct filename/MIME
- Collection management (create/verify)
- EspoCRM metadata update after sync
"""
from typing import Optional, Dict, Any
from datetime import datetime
class XAIUploadUtils:
"""
Stateless utility class for document upload operations to xAI.
All methods take explicit service instances to remain reusable
across different sync contexts.
"""
def __init__(self, ctx):
from services.logging_utils import get_service_logger
self._log = get_service_logger(__name__, ctx)
async def ensure_collection(
self,
akte: Dict[str, Any],
xai,
espocrm,
) -> Optional[str]:
"""
Ensure xAI collection exists for this Akte.
Creates one if missing, verifies it if present.
Returns:
collection_id or None on failure
"""
akte_id = akte['id']
akte_name = akte.get('name', f"Akte {akte.get('aktennummer', akte_id)}")
collection_id = akte.get('aiCollectionId')
if collection_id:
# Verify it still exists in xAI
try:
col = await xai.get_collection(collection_id)
if col:
self._log.debug(f"Collection {collection_id} verified for '{akte_name}'")
return collection_id
self._log.warn(f"Collection {collection_id} not found in xAI, recreating...")
except Exception as e:
self._log.warn(f"Could not verify collection {collection_id}: {e}, recreating...")
# Create new collection
try:
self._log.info(f"Creating xAI collection for '{akte_name}'...")
col = await xai.create_collection(
name=akte_name,
)
collection_id = col.get('collection_id') or col.get('id')
self._log.info(f"✅ Collection created: {collection_id}")
# Save back to EspoCRM
await espocrm.update_entity('CAkten', akte_id, {
'aiCollectionId': collection_id,
'aiSyncStatus': 'unclean', # Trigger full doc sync
})
return collection_id
except Exception as e:
self._log.error(f"❌ Failed to create xAI collection: {e}")
return None
async def sync_document_to_xai(
self,
doc: Dict[str, Any],
collection_id: str,
xai,
espocrm,
) -> bool:
"""
Sync a single CDokumente entity to xAI collection.
Decision logic (Blake3-based):
- aiSyncStatus in ['new', 'unclean', 'failed'] → always sync
- aiSyncStatus == 'synced' AND aiSyncHash == blake3hash → skip (no change)
- aiSyncStatus == 'synced' AND aiSyncHash != blake3hash → re-upload (changed)
- No attachment → mark unsupported
Returns:
True if synced/skipped successfully, False on error
"""
doc_id = doc['id']
doc_name = doc.get('name', doc_id)
ai_status = doc.get('aiSyncStatus', 'new')
ai_sync_hash = doc.get('aiSyncHash')
blake3_hash = doc.get('blake3hash')
ai_file_id = doc.get('aiFileId')
self._log.info(f" 📄 {doc_name}")
self._log.info(f" aiSyncStatus={ai_status}, aiSyncHash={ai_sync_hash[:12] if ai_sync_hash else 'N/A'}..., blake3={blake3_hash[:12] if blake3_hash else 'N/A'}...")
# File content unchanged (hash match) → kein Re-Upload nötig
if ai_status == 'synced' and ai_sync_hash and blake3_hash and ai_sync_hash == blake3_hash:
if ai_file_id:
# Custom metadata (fields) können nach dem Upload nicht mehr geändert werden.
# Nur Dateiname ist über PUT /v1/files/{id} änderbar.
current_name = doc.get('dokumentName') or doc.get('name', '')
if current_name and ai_file_id:
try:
await xai.rename_file(ai_file_id, current_name)
except Exception as e:
self._log.warn(f" ⚠️ Rename fehlgeschlagen (non-fatal): {e}")
self._log.info(f" ✅ Unverändert kein Re-Upload (hash match)")
else:
self._log.info(f" ⏭️ Skipped (hash match, kein aiFileId)")
return True
# Get attachment info
attachment_id = doc.get('dokumentId')
if not attachment_id:
self._log.warn(f" ⚠️ No attachment (dokumentId missing) - marking unsupported")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'unsupported',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
return True # Not an error, just unsupported
try:
# Download from EspoCRM
self._log.info(f" 📥 Downloading attachment {attachment_id}...")
file_content = await espocrm.download_attachment(attachment_id)
self._log.info(f" Downloaded {len(file_content)} bytes")
# Determine filename + MIME type
filename = doc.get('dokumentName') or doc.get('name', 'document.bin')
from urllib.parse import unquote
filename = unquote(filename)
import mimetypes
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = 'application/octet-stream'
# Remove old file from collection if updating
if ai_file_id and ai_status != 'new':
try:
await xai.remove_from_collection(collection_id, ai_file_id)
self._log.info(f" 🗑️ Removed old xAI file {ai_file_id}")
except Exception:
pass # Non-fatal - may already be gone
# Build metadata fields werden einmalig beim Upload gesetzt;
# Custom fields können nachträglich NICHT aktualisiert werden.
fields = {
'document_name': doc.get('name', filename),
'description': str(doc.get('beschreibung', '') or ''),
'advoware_art': str(doc.get('advowareArt', '') or ''),
'advoware_bemerkung': str(doc.get('advowareBemerkung', '') or ''),
'espocrm_id': doc['id'],
'created_at': str(doc.get('createdAt', '') or ''),
'modified_at': str(doc.get('modifiedAt', '') or ''),
}
# Single-request upload directly to collection incl. metadata fields
self._log.info(f" 📤 Uploading '{filename}' ({mime_type}) with metadata...")
new_xai_file_id = await xai.upload_to_collection(
collection_id, file_content, filename, mime_type, fields=fields
)
self._log.info(f" ✅ Uploaded + metadata set: {new_xai_file_id}")
# Update CDokumente with sync result
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': new_xai_file_id,
'aiCollectionId': collection_id,
'aiSyncHash': blake3_hash or doc.get('syncedHash'),
'aiSyncStatus': 'synced',
'aiLastSync': now,
})
self._log.info(f" ✅ EspoCRM updated")
return True
except Exception as e:
self._log.error(f" ❌ Failed: {e}")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'failed',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
return False
async def remove_document_from_xai(
self,
doc: Dict[str, Any],
collection_id: str,
xai,
espocrm,
) -> None:
"""Remove a CDokumente from its xAI collection (called on DELETE)."""
doc_id = doc['id']
ai_file_id = doc.get('aiFileId')
if not ai_file_id:
return
try:
await xai.remove_from_collection(collection_id, ai_file_id)
self._log.info(f" 🗑️ Removed {doc.get('name')} from xAI collection")
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': None,
'aiSyncStatus': 'new',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
except Exception as e:
self._log.warn(f" ⚠️ Could not remove from xAI: {e}")