Refactor code structure for improved readability and maintainability

This commit is contained in:
bsiggel
2026-03-26 22:24:07 +00:00
parent 9b2fb5ae4a
commit 1cd8de8574
10 changed files with 265 additions and 1313 deletions

View File

@@ -76,26 +76,28 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
sync_schalter = akte.get('syncSchalter', False)
aktivierungsstatus = str(akte.get('aktivierungsstatus') or '').lower()
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
ai_provider = str(akte.get('aiProvider') or 'xAI')
ctx.logger.info(f"📋 Akte '{akte.get('name')}'")
ctx.logger.info(f" syncSchalter : {sync_schalter}")
ctx.logger.info(f" aktivierungsstatus : {aktivierungsstatus}")
ctx.logger.info(f" aiAktivierungsstatus : {ai_aktivierungsstatus}")
ctx.logger.info(f" aiProvider : {ai_provider}")
# Advoware sync requires an aktennummer (Akten without Advoware won't have one)
advoware_enabled = bool(aktennummer) and sync_schalter and aktivierungsstatus in VALID_ADVOWARE_STATUSES
xai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ctx.logger.info(f" Advoware sync : {'✅ ON' if advoware_enabled else '⏭️ OFF'}")
ctx.logger.info(f" xAI sync : {'✅ ON' if xai_enabled else '⏭️ OFF'}")
ctx.logger.info(f" AI sync ({ai_provider}) : {'✅ ON' if ai_enabled else '⏭️ OFF'}")
if not advoware_enabled and not xai_enabled:
if not advoware_enabled and not ai_enabled:
ctx.logger.info("⏭️ Both syncs disabled nothing to do")
return
# ── Load CDokumente once (shared by Advoware + xAI sync) ─────────────────
espo_docs: list = []
if advoware_enabled or xai_enabled:
if advoware_enabled or ai_enabled:
espo_docs = await espocrm.list_related_all('CAkten', akte_id, 'dokumentes')
# ── ADVOWARE SYNC ────────────────────────────────────────────
@@ -103,9 +105,12 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
if advoware_enabled:
advoware_results = await _run_advoware_sync(akte, aktennummer, akte_id, espocrm, ctx, espo_docs)
# ── xAI SYNC ────────────────────────────────────────────────
if xai_enabled:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── AI SYNC (xAI or RAGflow) ─────────────────────────────────
if ai_enabled:
if ai_provider.lower() == 'ragflow':
await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs)
else:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── Final Status ───────────────────────────────────────────────────
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -117,10 +122,10 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
if aktivierungsstatus == 'import':
final_update['aktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aktivierungsstatus: import → active")
if xai_enabled:
if ai_enabled:
final_update['aiSyncStatus'] = 'synced'
final_update['aiLastSync'] = now
# 'new' = Collection wurde gerade erstmalig angelegt → auf 'aktiv' setzen
# 'new' = Dataset/Collection erstmalig angelegt → auf 'aktiv' setzen
if ai_aktivierungsstatus == 'new':
final_update['aiAktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aiAktivierungsstatus: new → active")
@@ -483,3 +488,216 @@ async def _run_xai_sync(
finally:
await xai.close()
# ─────────────────────────────────────────────────────────────────────────────
# RAGflow sync
# ─────────────────────────────────────────────────────────────────────────────
async def _run_ragflow_sync(
akte: Dict[str, Any],
akte_id: str,
espocrm,
ctx: FlowContext,
docs: list,
) -> None:
from services.ragflow_service import RAGFlowService
from urllib.parse import unquote
import mimetypes
ragflow = RAGFlowService(ctx)
ctx.logger.info("")
ctx.logger.info("" * 60)
ctx.logger.info("🧠 RAGflow SYNC")
ctx.logger.info("" * 60)
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
dataset_id = akte.get('aiCollectionId')
# ── Ensure dataset exists ─────────────────────────────────────────────
if not dataset_id:
if ai_aktivierungsstatus == 'new':
akte_name = akte.get('name') or f"Akte {akte.get('aktennummer', akte_id)}"
ctx.logger.info(f" Status 'new' → Erstelle neues RAGflow Dataset für '{akte_name}'...")
dataset_info = await ragflow.ensure_dataset(akte_name)
if not dataset_info or not dataset_info.get('id'):
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
dataset_id = dataset_info['id']
ctx.logger.info(f" ✅ Dataset erstellt: {dataset_id}")
await espocrm.update_entity('CAkten', akte_id, {'aiCollectionId': dataset_id})
else:
ctx.logger.error(
f"❌ aiAktivierungsstatus='{ai_aktivierungsstatus}' aber keine aiCollectionId "
f"RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen."
)
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
ctx.logger.info(f" Dataset-ID : {dataset_id}")
ctx.logger.info(f" EspoCRM docs: {len(docs)}")
# ── RAGflow-Bestand abrufen (source of truth) ─────────────────────────
# Lookup: espocrm_id → ragflow_doc (nur Docs die mit espocrm_id getaggt sind)
ragflow_by_espocrm_id: Dict[str, Any] = {}
try:
ragflow_docs = await ragflow.list_documents(dataset_id)
ctx.logger.info(f" RAGflow docs: {len(ragflow_docs)}")
for rd in ragflow_docs:
eid = rd.get('espocrm_id')
if eid:
ragflow_by_espocrm_id[eid] = rd
except Exception as e:
ctx.logger.error(f"❌ RAGflow Dokumentenliste nicht abrufbar: {e}")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
# ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ──
espocrm_ids_set = {d['id'] for d in docs}
for rd in ragflow_docs:
eid = rd.get('espocrm_id')
if eid and eid not in espocrm_ids_set:
try:
await ragflow.remove_document(dataset_id, rd['id'])
ctx.logger.info(f" 🗑️ Orphan gelöscht: {rd.get('name', rd['id'])} (espocrm_id={eid})")
except Exception as e:
ctx.logger.warn(f" ⚠️ Orphan-Delete fehlgeschlagen: {e}")
synced = 0
skipped = 0
failed = 0
for doc in docs:
doc_id = doc['id']
doc_name = doc.get('name', doc_id)
blake3_hash = doc.get('blake3hash') or ''
# Was ist aktuell in RAGflow für dieses Dokument?
ragflow_doc = ragflow_by_espocrm_id.get(doc_id)
ragflow_doc_id = ragflow_doc['id'] if ragflow_doc else None
ragflow_blake3 = ragflow_doc.get('blake3_hash', '') if ragflow_doc else ''
ragflow_meta = ragflow_doc.get('meta_fields', {}) if ragflow_doc else {}
# Aktuelle Metadaten aus EspoCRM
current_description = str(doc.get('beschreibung') or '')
current_advo_art = str(doc.get('advowareArt') or '')
current_advo_bemerk = str(doc.get('advowareBemerkung') or '')
content_changed = blake3_hash != ragflow_blake3
meta_changed = (
ragflow_meta.get('description', '') != current_description or
ragflow_meta.get('advoware_art', '') != current_advo_art or
ragflow_meta.get('advoware_bemerkung', '') != current_advo_bemerk
)
ctx.logger.info(f" 📄 {doc_name}")
ctx.logger.info(
f" in_ragflow={bool(ragflow_doc_id)}, "
f"content_changed={content_changed}, meta_changed={meta_changed}"
)
if ragflow_doc_id:
ctx.logger.info(
f" ragflow_blake3={ragflow_blake3[:12] if ragflow_blake3 else 'N/A'}..., "
f"espo_blake3={blake3_hash[:12] if blake3_hash else 'N/A'}..."
)
if not ragflow_doc_id and not content_changed and not meta_changed and not blake3_hash:
# Kein Attachment-Hash vorhanden und noch nie in RAGflow → unsupported
ctx.logger.info(f" ⏭️ Kein Blake3-Hash übersprungen")
skipped += 1
continue
attachment_id = doc.get('dokumentId')
if not attachment_id:
ctx.logger.warn(f" ⚠️ Kein Attachment (dokumentId fehlt) unsupported")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'unsupported',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
skipped += 1
continue
filename = unquote(doc.get('dokumentName') or doc.get('name') or 'document.bin')
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = 'application/octet-stream'
try:
if ragflow_doc_id and not content_changed and meta_changed:
# ── Nur Metadaten aktualisieren ───────────────────────────
ctx.logger.info(f" 🔄 Metadata-Update für {ragflow_doc_id}")
await ragflow.update_document_meta(
dataset_id, ragflow_doc_id,
blake3_hash=blake3_hash,
description=current_description,
advoware_art=current_advo_art,
advoware_bemerkung=current_advo_bemerk,
)
new_ragflow_id = ragflow_doc_id
elif ragflow_doc_id and not content_changed and not meta_changed:
# ── Vollständig unverändert → Skip ────────────────────────
ctx.logger.info(f" ✅ Unverändert kein Re-Upload")
# Tracking-Felder in EspoCRM aktuell halten
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': ragflow_doc_id,
'aiCollectionId': dataset_id,
'aiSyncHash': blake3_hash,
'aiSyncStatus': 'synced',
})
skipped += 1
continue
else:
# ── Upload (neu oder Inhalt geändert) ─────────────────────
if ragflow_doc_id and content_changed:
ctx.logger.info(f" 🗑️ Inhalt geändert altes Dokument löschen: {ragflow_doc_id}")
try:
await ragflow.remove_document(dataset_id, ragflow_doc_id)
except Exception:
pass
ctx.logger.info(f" 📥 Downloading {filename} ({attachment_id})…")
file_content = await espocrm.download_attachment(attachment_id)
ctx.logger.info(f" Downloaded {len(file_content)} bytes")
ctx.logger.info(f" 📤 Uploading '{filename}' ({mime_type})…")
result = await ragflow.upload_document(
dataset_id=dataset_id,
file_content=file_content,
filename=filename,
mime_type=mime_type,
blake3_hash=blake3_hash,
espocrm_id=doc_id,
description=current_description,
advoware_art=current_advo_art,
advoware_bemerkung=current_advo_bemerk,
)
if not result or not result.get('id'):
raise RuntimeError("upload_document gab kein Ergebnis zurück")
new_ragflow_id = result['id']
ctx.logger.info(f" ✅ RAGflow-ID: {new_ragflow_id}")
now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': new_ragflow_id,
'aiCollectionId': dataset_id,
'aiSyncHash': blake3_hash,
'aiSyncStatus': 'synced',
'aiLastSync': now_str,
})
synced += 1
except Exception as e:
ctx.logger.error(f" ❌ Fehlgeschlagen: {e}")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'failed',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
failed += 1
ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}")