feat(sync): Implement orphan cleanup for xAI documents without EspoCRM equivalents

This commit is contained in:
bsiggel
2026-03-26 14:20:33 +00:00
parent 1e202a6233
commit c9bdd021e4
3 changed files with 142 additions and 52 deletions

View File

@@ -60,12 +60,6 @@ class XAIUploadUtils:
self._log.info(f"Creating xAI collection for '{akte_name}'...")
col = await xai.create_collection(
name=akte_name,
metadata={
'espocrm_entity_type': 'CAkten',
'espocrm_entity_id': akte_id,
'aktenzeichen': str(akte.get('aktennummer', '')),
'rubrum': str(akte.get('rubrum', '') or ''),
}
)
collection_id = col.get('collection_id') or col.get('id')
self._log.info(f"✅ Collection created: {collection_id}")
@@ -110,9 +104,20 @@ class XAIUploadUtils:
self._log.info(f" 📄 {doc_name}")
self._log.info(f" aiSyncStatus={ai_status}, aiSyncHash={ai_sync_hash[:12] if ai_sync_hash else 'N/A'}..., blake3={blake3_hash[:12] if blake3_hash else 'N/A'}...")
# Skip if already synced and hash matches
# File content unchanged (hash match) → kein Re-Upload nötig
if ai_status == 'synced' and ai_sync_hash and blake3_hash and ai_sync_hash == blake3_hash:
self._log.info(f" ⏭️ Skipped (hash match, no change)")
if ai_file_id:
# Custom metadata (fields) können nach dem Upload nicht mehr geändert werden.
# Nur Dateiname ist über PUT /v1/files/{id} änderbar.
current_name = doc.get('dokumentName') or doc.get('name', '')
if current_name and ai_file_id:
try:
await xai.rename_file(ai_file_id, current_name)
except Exception as e:
self._log.warn(f" ⚠️ Rename fehlgeschlagen (non-fatal): {e}")
self._log.info(f" ✅ Unverändert kein Re-Upload (hash match)")
else:
self._log.info(f" ⏭️ Skipped (hash match, kein aiFileId)")
return True
# Get attachment info
@@ -149,27 +154,24 @@ class XAIUploadUtils:
except Exception:
pass # Non-fatal - may already be gone
# Upload to xAI
self._log.info(f" 📤 Uploading '{filename}' ({mime_type})...")
new_xai_file_id = await xai.upload_file(file_content, filename, mime_type)
self._log.info(f" Uploaded: xai_file_id={new_xai_file_id}")
# Build metadata fields werden einmalig beim Upload gesetzt;
# Custom fields können nachträglich NICHT aktualisiert werden.
fields = {
'document_name': doc.get('name', filename),
'description': str(doc.get('beschreibung', '') or ''),
'advoware_art': str(doc.get('advowareArt', '') or ''),
'advoware_bemerkung': str(doc.get('advowareBemerkung', '') or ''),
'espocrm_id': doc['id'],
'created_at': str(doc.get('createdAt', '') or ''),
'modified_at': str(doc.get('modifiedAt', '') or ''),
}
# Add to collection
await xai.add_to_collection(collection_id, new_xai_file_id)
self._log.info(f" ✅ Added to collection {collection_id}")
# Set document metadata (injected into chunks for better AI context)
try:
await xai.update_document_metadata(collection_id, new_xai_file_id, {
'document_name': doc.get('name', filename),
'description': str(doc.get('beschreibung', '') or ''),
'advoware_art': str(doc.get('advowareArt', '') or ''),
'advoware_bemerkung': str(doc.get('advowareBemerkung', '') or ''),
'espocrm_id': doc['id'],
})
self._log.info(f" ✅ Dokument-Metadaten gesetzt")
except Exception as meta_err:
self._log.warn(f" ⚠️ Metadaten-Update fehlgeschlagen (non-fatal): {meta_err}")
# Single-request upload directly to collection incl. metadata fields
self._log.info(f" 📤 Uploading '{filename}' ({mime_type}) with metadata...")
new_xai_file_id = await xai.upload_to_collection(
collection_id, file_content, filename, mime_type, fields=fields
)
self._log.info(f" ✅ Uploaded + metadata set: {new_xai_file_id}")
# Update CDokumente with sync result
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')