feat(sync): Enhance Akte Sync with RAGflow support and improve error handling

This commit is contained in:
bsiggel
2026-03-26 23:09:42 +00:00
parent 1cd8de8574
commit 9bd62fc5ab

View File

@@ -4,7 +4,9 @@ Akte Sync - Event Handler
Unified sync for one CAkten entity across all configured backends: Unified sync for one CAkten entity across all configured backends:
- Advoware (3-way merge: Windows ↔ EspoCRM ↔ History) - Advoware (3-way merge: Windows ↔ EspoCRM ↔ History)
- xAI (Blake3 hash-based upload to Collection) - xAI (Blake3 hash-based upload to Collection)
- RAGflow (Dataset-based upload with laws chunk_method)
AI provider is selected via CAkten.aiProvider ('xai' or 'ragflow').
Both run in the same event to keep CDokumente perfectly in sync. Both run in the same event to keep CDokumente perfectly in sync.
Trigger: akte.sync { akte_id, aktennummer } Trigger: akte.sync { akte_id, aktennummer }
@@ -15,6 +17,8 @@ Enqueues:
- document.generate_preview (after CREATE / UPDATE_ESPO) - document.generate_preview (after CREATE / UPDATE_ESPO)
""" """
import traceback
import time
from typing import Dict, Any from typing import Dict, Any
from datetime import datetime from datetime import datetime
from motia import FlowContext, queue from motia import FlowContext, queue
@@ -22,7 +26,7 @@ from motia import FlowContext, queue
config = { config = {
"name": "Akte Sync - Event Handler", "name": "Akte Sync - Event Handler",
"description": "Unified sync for one Akte: Advoware 3-way merge + xAI upload", "description": "Unified sync for one Akte: Advoware 3-way merge + AI upload (xAI or RAGflow)",
"flows": ["akte-sync"], "flows": ["akte-sync"],
"triggers": [queue("akte.sync")], "triggers": [queue("akte.sync")],
"enqueues": ["document.generate_preview"], "enqueues": ["document.generate_preview"],
@@ -54,7 +58,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
return return
lock_key = f"akte_sync:{akte_id}" lock_key = f"akte_sync:{akte_id}"
lock_acquired = redis_client.set(lock_key, datetime.now().isoformat(), nx=True, ex=600) lock_acquired = redis_client.set(lock_key, datetime.now().isoformat(), nx=True, ex=1800) # 30 min
if not lock_acquired: if not lock_acquired:
ctx.logger.warn(f"⏸️ Lock busy for Akte {akte_id} requeueing") ctx.logger.warn(f"⏸️ Lock busy for Akte {akte_id} requeueing")
raise RuntimeError(f"Lock busy for akte_id={akte_id}") raise RuntimeError(f"Lock busy for akte_id={akte_id}")
@@ -104,13 +108,21 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
advoware_results = None advoware_results = None
if advoware_enabled: if advoware_enabled:
advoware_results = await _run_advoware_sync(akte, aktennummer, akte_id, espocrm, ctx, espo_docs) advoware_results = await _run_advoware_sync(akte, aktennummer, akte_id, espocrm, ctx, espo_docs)
# Re-fetch docs after Advoware sync newly created docs must be visible to AI sync
if ai_enabled and advoware_results and advoware_results.get('created', 0) > 0:
ctx.logger.info(
f" 🔄 Re-fetching docs after Advoware sync "
f"({advoware_results['created']} new doc(s) created)"
)
espo_docs = await espocrm.list_related_all('CAkten', akte_id, 'dokumentes')
# ── AI SYNC (xAI or RAGflow) ───────────────────────────────── # ── AI SYNC (xAI or RAGflow) ─────────────────────────────────
ai_had_failures = False
if ai_enabled: if ai_enabled:
if ai_provider.lower() == 'ragflow': if ai_provider.lower() == 'ragflow':
await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs) ai_had_failures = await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs)
else: else:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs) ai_had_failures = await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── Final Status ─────────────────────────────────────────────────── # ── Final Status ───────────────────────────────────────────────────
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -123,7 +135,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
final_update['aktivierungsstatus'] = 'active' final_update['aktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aktivierungsstatus: import → active") ctx.logger.info("🔄 aktivierungsstatus: import → active")
if ai_enabled: if ai_enabled:
final_update['aiSyncStatus'] = 'synced' final_update['aiSyncStatus'] = 'failed' if ai_had_failures else 'synced'
final_update['aiLastSync'] = now final_update['aiLastSync'] = now
# 'new' = Dataset/Collection erstmalig angelegt → auf 'aktiv' setzen # 'new' = Dataset/Collection erstmalig angelegt → auf 'aktiv' setzen
if ai_aktivierungsstatus == 'new': if ai_aktivierungsstatus == 'new':
@@ -143,11 +155,9 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
except Exception as e: except Exception as e:
ctx.logger.error(f"❌ Sync failed: {e}") ctx.logger.error(f"❌ Sync failed: {e}")
import traceback
ctx.logger.error(traceback.format_exc()) ctx.logger.error(traceback.format_exc())
# Requeue Advoware aktennummer for retry (Motia retries the akte.sync event itself) # Requeue Advoware aktennummer for retry (Motia retries the akte.sync event itself)
import time
if aktennummer: if aktennummer:
redis_client.zadd("advoware:pending_aktennummern", {aktennummer: time.time()}) redis_client.zadd("advoware:pending_aktennummern", {aktennummer: time.time()})
@@ -393,7 +403,7 @@ async def _run_xai_sync(
espocrm, espocrm,
ctx: FlowContext, ctx: FlowContext,
docs: list, docs: list,
) -> None: ) -> bool:
from services.xai_service import XAIService from services.xai_service import XAIService
from services.xai_upload_utils import XAIUploadUtils from services.xai_upload_utils import XAIUploadUtils
@@ -418,7 +428,7 @@ async def _run_xai_sync(
if not collection_id: if not collection_id:
ctx.logger.error("❌ xAI Collection konnte nicht erstellt werden Sync abgebrochen") ctx.logger.error("❌ xAI Collection konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
ctx.logger.info(f" ✅ Collection erstellt: {collection_id}") ctx.logger.info(f" ✅ Collection erstellt: {collection_id}")
# aiAktivierungsstatus → 'aktiv' wird in handler final_update gesetzt # aiAktivierungsstatus → 'aktiv' wird in handler final_update gesetzt
else: else:
@@ -428,7 +438,7 @@ async def _run_xai_sync(
f"xAI Sync abgebrochen. Bitte Collection-ID in EspoCRM eintragen." f"xAI Sync abgebrochen. Bitte Collection-ID in EspoCRM eintragen."
) )
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
else: else:
# Collection-ID vorhanden → verifizieren ob sie noch in xAI existiert # Collection-ID vorhanden → verifizieren ob sie noch in xAI existiert
try: try:
@@ -436,12 +446,12 @@ async def _run_xai_sync(
if not col: if not col:
ctx.logger.error(f"❌ Collection {collection_id} existiert nicht mehr in xAI Sync abgebrochen") ctx.logger.error(f"❌ Collection {collection_id} existiert nicht mehr in xAI Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
ctx.logger.info(f" ✅ Collection verifiziert: {collection_id}") ctx.logger.info(f" ✅ Collection verifiziert: {collection_id}")
except Exception as e: except Exception as e:
ctx.logger.error(f"❌ Collection-Verifizierung fehlgeschlagen: {e} Sync abgebrochen") ctx.logger.error(f"❌ Collection-Verifizierung fehlgeschlagen: {e} Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
ctx.logger.info(f" Documents to check: {len(docs)}") ctx.logger.info(f" Documents to check: {len(docs)}")
@@ -485,6 +495,7 @@ async def _run_xai_sync(
ctx.logger.info(f" ✅ Synced : {synced}") ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}") ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}") ctx.logger.info(f" ❌ Failed : {failed}")
return failed > 0
finally: finally:
await xai.close() await xai.close()
@@ -500,7 +511,7 @@ async def _run_ragflow_sync(
espocrm, espocrm,
ctx: FlowContext, ctx: FlowContext,
docs: list, docs: list,
) -> None: ) -> bool:
from services.ragflow_service import RAGFlowService from services.ragflow_service import RAGFlowService
from urllib.parse import unquote from urllib.parse import unquote
import mimetypes import mimetypes
@@ -512,6 +523,7 @@ async def _run_ragflow_sync(
ctx.logger.info("🧠 RAGflow SYNC") ctx.logger.info("🧠 RAGflow SYNC")
ctx.logger.info("" * 60) ctx.logger.info("" * 60)
try:
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower() ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
dataset_id = akte.get('aiCollectionId') dataset_id = akte.get('aiCollectionId')
@@ -524,7 +536,7 @@ async def _run_ragflow_sync(
if not dataset_info or not dataset_info.get('id'): if not dataset_info or not dataset_info.get('id'):
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden Sync abgebrochen") ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
dataset_id = dataset_info['id'] dataset_id = dataset_info['id']
ctx.logger.info(f" ✅ Dataset erstellt: {dataset_id}") ctx.logger.info(f" ✅ Dataset erstellt: {dataset_id}")
await espocrm.update_entity('CAkten', akte_id, {'aiCollectionId': dataset_id}) await espocrm.update_entity('CAkten', akte_id, {'aiCollectionId': dataset_id})
@@ -534,13 +546,12 @@ async def _run_ragflow_sync(
f"RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen." f"RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen."
) )
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
ctx.logger.info(f" Dataset-ID : {dataset_id}") ctx.logger.info(f" Dataset-ID : {dataset_id}")
ctx.logger.info(f" EspoCRM docs: {len(docs)}") ctx.logger.info(f" EspoCRM docs: {len(docs)}")
# ── RAGflow-Bestand abrufen (source of truth) ───────────────────────── # ── RAGflow-Bestand abrufen (source of truth) ─────────────────────────
# Lookup: espocrm_id → ragflow_doc (nur Docs die mit espocrm_id getaggt sind)
ragflow_by_espocrm_id: Dict[str, Any] = {} ragflow_by_espocrm_id: Dict[str, Any] = {}
try: try:
ragflow_docs = await ragflow.list_documents(dataset_id) ragflow_docs = await ragflow.list_documents(dataset_id)
@@ -552,7 +563,7 @@ async def _run_ragflow_sync(
except Exception as e: except Exception as e:
ctx.logger.error(f"❌ RAGflow Dokumentenliste nicht abrufbar: {e}") ctx.logger.error(f"❌ RAGflow Dokumentenliste nicht abrufbar: {e}")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'}) await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return return True # had failures
# ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ── # ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ──
espocrm_ids_set = {d['id'] for d in docs} espocrm_ids_set = {d['id'] for d in docs}
@@ -603,8 +614,7 @@ async def _run_ragflow_sync(
f"espo_blake3={blake3_hash[:12] if blake3_hash else 'N/A'}..." f"espo_blake3={blake3_hash[:12] if blake3_hash else 'N/A'}..."
) )
if not ragflow_doc_id and not content_changed and not meta_changed and not blake3_hash: if not ragflow_doc_id and not blake3_hash:
# Kein Attachment-Hash vorhanden und noch nie in RAGflow → unsupported
ctx.logger.info(f" ⏭️ Kein Blake3-Hash übersprungen") ctx.logger.info(f" ⏭️ Kein Blake3-Hash übersprungen")
skipped += 1 skipped += 1
continue continue
@@ -640,7 +650,6 @@ async def _run_ragflow_sync(
elif ragflow_doc_id and not content_changed and not meta_changed: elif ragflow_doc_id and not content_changed and not meta_changed:
# ── Vollständig unverändert → Skip ──────────────────────── # ── Vollständig unverändert → Skip ────────────────────────
ctx.logger.info(f" ✅ Unverändert kein Re-Upload") ctx.logger.info(f" ✅ Unverändert kein Re-Upload")
# Tracking-Felder in EspoCRM aktuell halten
await espocrm.update_entity('CDokumente', doc_id, { await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': ragflow_doc_id, 'aiFileId': ragflow_doc_id,
'aiCollectionId': dataset_id, 'aiCollectionId': dataset_id,
@@ -701,3 +710,13 @@ async def _run_ragflow_sync(
ctx.logger.info(f" ✅ Synced : {synced}") ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}") ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}") ctx.logger.info(f" ❌ Failed : {failed}")
return failed > 0
except Exception as e:
ctx.logger.error(f"❌ RAGflow Sync unerwarteter Fehler: {e}")
ctx.logger.error(traceback.format_exc())
try:
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
except Exception:
pass
return True # had failures