feat(sync): Enhance Akte Sync with RAGflow support and improve error handling

This commit is contained in:
bsiggel
2026-03-26 23:09:42 +00:00
parent 1cd8de8574
commit 9bd62fc5ab

View File

@@ -4,7 +4,9 @@ Akte Sync - Event Handler
Unified sync for one CAkten entity across all configured backends:
- Advoware (3-way merge: Windows ↔ EspoCRM ↔ History)
- xAI (Blake3 hash-based upload to Collection)
- RAGflow (Dataset-based upload with laws chunk_method)
AI provider is selected via CAkten.aiProvider ('xai' or 'ragflow').
Both run in the same event to keep CDokumente perfectly in sync.
Trigger: akte.sync { akte_id, aktennummer }
@@ -15,6 +17,8 @@ Enqueues:
- document.generate_preview (after CREATE / UPDATE_ESPO)
"""
import traceback
import time
from typing import Dict, Any
from datetime import datetime
from motia import FlowContext, queue
@@ -22,7 +26,7 @@ from motia import FlowContext, queue
config = {
"name": "Akte Sync - Event Handler",
"description": "Unified sync for one Akte: Advoware 3-way merge + xAI upload",
"description": "Unified sync for one Akte: Advoware 3-way merge + AI upload (xAI or RAGflow)",
"flows": ["akte-sync"],
"triggers": [queue("akte.sync")],
"enqueues": ["document.generate_preview"],
@@ -54,7 +58,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
return
lock_key = f"akte_sync:{akte_id}"
lock_acquired = redis_client.set(lock_key, datetime.now().isoformat(), nx=True, ex=600)
lock_acquired = redis_client.set(lock_key, datetime.now().isoformat(), nx=True, ex=1800) # 30 min
if not lock_acquired:
ctx.logger.warn(f"⏸️ Lock busy for Akte {akte_id} requeueing")
raise RuntimeError(f"Lock busy for akte_id={akte_id}")
@@ -104,13 +108,21 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
advoware_results = None
if advoware_enabled:
advoware_results = await _run_advoware_sync(akte, aktennummer, akte_id, espocrm, ctx, espo_docs)
# Re-fetch docs after Advoware sync newly created docs must be visible to AI sync
if ai_enabled and advoware_results and advoware_results.get('created', 0) > 0:
ctx.logger.info(
f" 🔄 Re-fetching docs after Advoware sync "
f"({advoware_results['created']} new doc(s) created)"
)
espo_docs = await espocrm.list_related_all('CAkten', akte_id, 'dokumentes')
# ── AI SYNC (xAI or RAGflow) ─────────────────────────────────
ai_had_failures = False
if ai_enabled:
if ai_provider.lower() == 'ragflow':
await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs)
ai_had_failures = await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs)
else:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
ai_had_failures = await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── Final Status ───────────────────────────────────────────────────
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -123,7 +135,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
final_update['aktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aktivierungsstatus: import → active")
if ai_enabled:
final_update['aiSyncStatus'] = 'synced'
final_update['aiSyncStatus'] = 'failed' if ai_had_failures else 'synced'
final_update['aiLastSync'] = now
# 'new' = Dataset/Collection erstmalig angelegt → auf 'aktiv' setzen
if ai_aktivierungsstatus == 'new':
@@ -143,11 +155,9 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
except Exception as e:
ctx.logger.error(f"❌ Sync failed: {e}")
import traceback
ctx.logger.error(traceback.format_exc())
# Requeue Advoware aktennummer for retry (Motia retries the akte.sync event itself)
import time
if aktennummer:
redis_client.zadd("advoware:pending_aktennummern", {aktennummer: time.time()})
@@ -393,7 +403,7 @@ async def _run_xai_sync(
espocrm,
ctx: FlowContext,
docs: list,
) -> None:
) -> bool:
from services.xai_service import XAIService
from services.xai_upload_utils import XAIUploadUtils
@@ -418,7 +428,7 @@ async def _run_xai_sync(
if not collection_id:
ctx.logger.error("❌ xAI Collection konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
ctx.logger.info(f" ✅ Collection erstellt: {collection_id}")
# aiAktivierungsstatus → 'aktiv' wird in handler final_update gesetzt
else:
@@ -428,7 +438,7 @@ async def _run_xai_sync(
f"xAI Sync abgebrochen. Bitte Collection-ID in EspoCRM eintragen."
)
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
else:
# Collection-ID vorhanden → verifizieren ob sie noch in xAI existiert
try:
@@ -436,12 +446,12 @@ async def _run_xai_sync(
if not col:
ctx.logger.error(f"❌ Collection {collection_id} existiert nicht mehr in xAI Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
ctx.logger.info(f" ✅ Collection verifiziert: {collection_id}")
except Exception as e:
ctx.logger.error(f"❌ Collection-Verifizierung fehlgeschlagen: {e} Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
ctx.logger.info(f" Documents to check: {len(docs)}")
@@ -485,6 +495,7 @@ async def _run_xai_sync(
ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}")
return failed > 0
finally:
await xai.close()
@@ -500,7 +511,7 @@ async def _run_ragflow_sync(
espocrm,
ctx: FlowContext,
docs: list,
) -> None:
) -> bool:
from services.ragflow_service import RAGFlowService
from urllib.parse import unquote
import mimetypes
@@ -512,6 +523,7 @@ async def _run_ragflow_sync(
ctx.logger.info("🧠 RAGflow SYNC")
ctx.logger.info("" * 60)
try:
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
dataset_id = akte.get('aiCollectionId')
@@ -524,7 +536,7 @@ async def _run_ragflow_sync(
if not dataset_info or not dataset_info.get('id'):
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
dataset_id = dataset_info['id']
ctx.logger.info(f" ✅ Dataset erstellt: {dataset_id}")
await espocrm.update_entity('CAkten', akte_id, {'aiCollectionId': dataset_id})
@@ -534,13 +546,12 @@ async def _run_ragflow_sync(
f"RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen."
)
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
ctx.logger.info(f" Dataset-ID : {dataset_id}")
ctx.logger.info(f" EspoCRM docs: {len(docs)}")
# ── RAGflow-Bestand abrufen (source of truth) ─────────────────────────
# Lookup: espocrm_id → ragflow_doc (nur Docs die mit espocrm_id getaggt sind)
ragflow_by_espocrm_id: Dict[str, Any] = {}
try:
ragflow_docs = await ragflow.list_documents(dataset_id)
@@ -552,7 +563,7 @@ async def _run_ragflow_sync(
except Exception as e:
ctx.logger.error(f"❌ RAGflow Dokumentenliste nicht abrufbar: {e}")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
return True # had failures
# ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ──
espocrm_ids_set = {d['id'] for d in docs}
@@ -603,8 +614,7 @@ async def _run_ragflow_sync(
f"espo_blake3={blake3_hash[:12] if blake3_hash else 'N/A'}..."
)
if not ragflow_doc_id and not content_changed and not meta_changed and not blake3_hash:
# Kein Attachment-Hash vorhanden und noch nie in RAGflow → unsupported
if not ragflow_doc_id and not blake3_hash:
ctx.logger.info(f" ⏭️ Kein Blake3-Hash übersprungen")
skipped += 1
continue
@@ -640,7 +650,6 @@ async def _run_ragflow_sync(
elif ragflow_doc_id and not content_changed and not meta_changed:
# ── Vollständig unverändert → Skip ────────────────────────
ctx.logger.info(f" ✅ Unverändert kein Re-Upload")
# Tracking-Felder in EspoCRM aktuell halten
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': ragflow_doc_id,
'aiCollectionId': dataset_id,
@@ -701,3 +710,13 @@ async def _run_ragflow_sync(
ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}")
return failed > 0
except Exception as e:
ctx.logger.error(f"❌ RAGflow Sync unerwarteter Fehler: {e}")
ctx.logger.error(traceback.format_exc())
try:
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
except Exception:
pass
return True # had failures