Refactor code structure for improved readability and maintainability

This commit is contained in:
bsiggel
2026-03-26 22:24:07 +00:00
parent 9b2fb5ae4a
commit 1cd8de8574
10 changed files with 265 additions and 1313 deletions

View File

@@ -49,7 +49,7 @@ async def handler(request: ApiRequest, ctx: FlowContext) -> ApiResponse:
ctx.logger.error("❌ Invalid auth token")
ctx.logger.error(f" Expected: Bearer {expected_token[:10]}...")
ctx.logger.error(f" Received: {auth_header[:30]}...")
return ApiResponse(status_code=401, body={"error": "Unauthorized"})
return ApiResponse(status=401, body={"error": "Unauthorized"})
ctx.logger.info("✅ Auth-Token valid")
@@ -75,7 +75,7 @@ async def handler(request: ApiRequest, ctx: FlowContext) -> ApiResponse:
if not aktennummer:
ctx.logger.error("❌ Missing 'aktennummer' in payload")
return ApiResponse(status_code=400, body={"error": "Missing aktennummer"})
return ApiResponse(status=400, body={"error": "Missing aktennummer"})
ctx.logger.info(f"📂 Aktennummer: {aktennummer}")
ctx.logger.info(f"⏰ Timestamp: {timestamp}")
@@ -114,7 +114,7 @@ async def handler(request: ApiRequest, ctx: FlowContext) -> ApiResponse:
ctx.logger.info("=" * 80)
return ApiResponse(
status_code=200,
status=200,
body={
"success": True,
"aktennummer": aktennummer,
@@ -136,7 +136,7 @@ async def handler(request: ApiRequest, ctx: FlowContext) -> ApiResponse:
ctx.logger.error(traceback.format_exc())
return ApiResponse(
status_code=500,
status=500,
body={
"success": False,
"error": str(e),

View File

@@ -76,26 +76,28 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
sync_schalter = akte.get('syncSchalter', False)
aktivierungsstatus = str(akte.get('aktivierungsstatus') or '').lower()
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
ai_provider = str(akte.get('aiProvider') or 'xAI')
ctx.logger.info(f"📋 Akte '{akte.get('name')}'")
ctx.logger.info(f" syncSchalter : {sync_schalter}")
ctx.logger.info(f" aktivierungsstatus : {aktivierungsstatus}")
ctx.logger.info(f" aiAktivierungsstatus : {ai_aktivierungsstatus}")
ctx.logger.info(f" aiProvider : {ai_provider}")
# Advoware sync requires an aktennummer (Akten without Advoware won't have one)
advoware_enabled = bool(aktennummer) and sync_schalter and aktivierungsstatus in VALID_ADVOWARE_STATUSES
xai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ctx.logger.info(f" Advoware sync : {'✅ ON' if advoware_enabled else '⏭️ OFF'}")
ctx.logger.info(f" xAI sync : {'✅ ON' if xai_enabled else '⏭️ OFF'}")
ctx.logger.info(f" AI sync ({ai_provider}) : {'✅ ON' if ai_enabled else '⏭️ OFF'}")
if not advoware_enabled and not xai_enabled:
if not advoware_enabled and not ai_enabled:
ctx.logger.info("⏭️ Both syncs disabled nothing to do")
return
# ── Load CDokumente once (shared by Advoware + xAI sync) ─────────────────
espo_docs: list = []
if advoware_enabled or xai_enabled:
if advoware_enabled or ai_enabled:
espo_docs = await espocrm.list_related_all('CAkten', akte_id, 'dokumentes')
# ── ADVOWARE SYNC ────────────────────────────────────────────
@@ -103,9 +105,12 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
if advoware_enabled:
advoware_results = await _run_advoware_sync(akte, aktennummer, akte_id, espocrm, ctx, espo_docs)
# ── xAI SYNC ────────────────────────────────────────────────
if xai_enabled:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── AI SYNC (xAI or RAGflow) ─────────────────────────────────
if ai_enabled:
if ai_provider.lower() == 'ragflow':
await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs)
else:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── Final Status ───────────────────────────────────────────────────
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
@@ -117,10 +122,10 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
if aktivierungsstatus == 'import':
final_update['aktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aktivierungsstatus: import → active")
if xai_enabled:
if ai_enabled:
final_update['aiSyncStatus'] = 'synced'
final_update['aiLastSync'] = now
# 'new' = Collection wurde gerade erstmalig angelegt → auf 'aktiv' setzen
# 'new' = Dataset/Collection erstmalig angelegt → auf 'aktiv' setzen
if ai_aktivierungsstatus == 'new':
final_update['aiAktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aiAktivierungsstatus: new → active")
@@ -483,3 +488,216 @@ async def _run_xai_sync(
finally:
await xai.close()
# ─────────────────────────────────────────────────────────────────────────────
# RAGflow sync
# ─────────────────────────────────────────────────────────────────────────────
async def _run_ragflow_sync(
akte: Dict[str, Any],
akte_id: str,
espocrm,
ctx: FlowContext,
docs: list,
) -> None:
from services.ragflow_service import RAGFlowService
from urllib.parse import unquote
import mimetypes
ragflow = RAGFlowService(ctx)
ctx.logger.info("")
ctx.logger.info("" * 60)
ctx.logger.info("🧠 RAGflow SYNC")
ctx.logger.info("" * 60)
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
dataset_id = akte.get('aiCollectionId')
# ── Ensure dataset exists ─────────────────────────────────────────────
if not dataset_id:
if ai_aktivierungsstatus == 'new':
akte_name = akte.get('name') or f"Akte {akte.get('aktennummer', akte_id)}"
ctx.logger.info(f" Status 'new' → Erstelle neues RAGflow Dataset für '{akte_name}'...")
dataset_info = await ragflow.ensure_dataset(akte_name)
if not dataset_info or not dataset_info.get('id'):
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
dataset_id = dataset_info['id']
ctx.logger.info(f" ✅ Dataset erstellt: {dataset_id}")
await espocrm.update_entity('CAkten', akte_id, {'aiCollectionId': dataset_id})
else:
ctx.logger.error(
f"❌ aiAktivierungsstatus='{ai_aktivierungsstatus}' aber keine aiCollectionId "
f"RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen."
)
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
ctx.logger.info(f" Dataset-ID : {dataset_id}")
ctx.logger.info(f" EspoCRM docs: {len(docs)}")
# ── RAGflow-Bestand abrufen (source of truth) ─────────────────────────
# Lookup: espocrm_id → ragflow_doc (nur Docs die mit espocrm_id getaggt sind)
ragflow_by_espocrm_id: Dict[str, Any] = {}
try:
ragflow_docs = await ragflow.list_documents(dataset_id)
ctx.logger.info(f" RAGflow docs: {len(ragflow_docs)}")
for rd in ragflow_docs:
eid = rd.get('espocrm_id')
if eid:
ragflow_by_espocrm_id[eid] = rd
except Exception as e:
ctx.logger.error(f"❌ RAGflow Dokumentenliste nicht abrufbar: {e}")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
# ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ──
espocrm_ids_set = {d['id'] for d in docs}
for rd in ragflow_docs:
eid = rd.get('espocrm_id')
if eid and eid not in espocrm_ids_set:
try:
await ragflow.remove_document(dataset_id, rd['id'])
ctx.logger.info(f" 🗑️ Orphan gelöscht: {rd.get('name', rd['id'])} (espocrm_id={eid})")
except Exception as e:
ctx.logger.warn(f" ⚠️ Orphan-Delete fehlgeschlagen: {e}")
synced = 0
skipped = 0
failed = 0
for doc in docs:
doc_id = doc['id']
doc_name = doc.get('name', doc_id)
blake3_hash = doc.get('blake3hash') or ''
# Was ist aktuell in RAGflow für dieses Dokument?
ragflow_doc = ragflow_by_espocrm_id.get(doc_id)
ragflow_doc_id = ragflow_doc['id'] if ragflow_doc else None
ragflow_blake3 = ragflow_doc.get('blake3_hash', '') if ragflow_doc else ''
ragflow_meta = ragflow_doc.get('meta_fields', {}) if ragflow_doc else {}
# Aktuelle Metadaten aus EspoCRM
current_description = str(doc.get('beschreibung') or '')
current_advo_art = str(doc.get('advowareArt') or '')
current_advo_bemerk = str(doc.get('advowareBemerkung') or '')
content_changed = blake3_hash != ragflow_blake3
meta_changed = (
ragflow_meta.get('description', '') != current_description or
ragflow_meta.get('advoware_art', '') != current_advo_art or
ragflow_meta.get('advoware_bemerkung', '') != current_advo_bemerk
)
ctx.logger.info(f" 📄 {doc_name}")
ctx.logger.info(
f" in_ragflow={bool(ragflow_doc_id)}, "
f"content_changed={content_changed}, meta_changed={meta_changed}"
)
if ragflow_doc_id:
ctx.logger.info(
f" ragflow_blake3={ragflow_blake3[:12] if ragflow_blake3 else 'N/A'}..., "
f"espo_blake3={blake3_hash[:12] if blake3_hash else 'N/A'}..."
)
if not ragflow_doc_id and not content_changed and not meta_changed and not blake3_hash:
# Kein Attachment-Hash vorhanden und noch nie in RAGflow → unsupported
ctx.logger.info(f" ⏭️ Kein Blake3-Hash übersprungen")
skipped += 1
continue
attachment_id = doc.get('dokumentId')
if not attachment_id:
ctx.logger.warn(f" ⚠️ Kein Attachment (dokumentId fehlt) unsupported")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'unsupported',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
skipped += 1
continue
filename = unquote(doc.get('dokumentName') or doc.get('name') or 'document.bin')
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = 'application/octet-stream'
try:
if ragflow_doc_id and not content_changed and meta_changed:
# ── Nur Metadaten aktualisieren ───────────────────────────
ctx.logger.info(f" 🔄 Metadata-Update für {ragflow_doc_id}")
await ragflow.update_document_meta(
dataset_id, ragflow_doc_id,
blake3_hash=blake3_hash,
description=current_description,
advoware_art=current_advo_art,
advoware_bemerkung=current_advo_bemerk,
)
new_ragflow_id = ragflow_doc_id
elif ragflow_doc_id and not content_changed and not meta_changed:
# ── Vollständig unverändert → Skip ────────────────────────
ctx.logger.info(f" ✅ Unverändert kein Re-Upload")
# Tracking-Felder in EspoCRM aktuell halten
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': ragflow_doc_id,
'aiCollectionId': dataset_id,
'aiSyncHash': blake3_hash,
'aiSyncStatus': 'synced',
})
skipped += 1
continue
else:
# ── Upload (neu oder Inhalt geändert) ─────────────────────
if ragflow_doc_id and content_changed:
ctx.logger.info(f" 🗑️ Inhalt geändert altes Dokument löschen: {ragflow_doc_id}")
try:
await ragflow.remove_document(dataset_id, ragflow_doc_id)
except Exception:
pass
ctx.logger.info(f" 📥 Downloading {filename} ({attachment_id})…")
file_content = await espocrm.download_attachment(attachment_id)
ctx.logger.info(f" Downloaded {len(file_content)} bytes")
ctx.logger.info(f" 📤 Uploading '{filename}' ({mime_type})…")
result = await ragflow.upload_document(
dataset_id=dataset_id,
file_content=file_content,
filename=filename,
mime_type=mime_type,
blake3_hash=blake3_hash,
espocrm_id=doc_id,
description=current_description,
advoware_art=current_advo_art,
advoware_bemerkung=current_advo_bemerk,
)
if not result or not result.get('id'):
raise RuntimeError("upload_document gab kein Ergebnis zurück")
new_ragflow_id = result['id']
ctx.logger.info(f" ✅ RAGflow-ID: {new_ragflow_id}")
now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': new_ragflow_id,
'aiCollectionId': dataset_id,
'aiSyncHash': blake3_hash,
'aiSyncStatus': 'synced',
'aiLastSync': now_str,
})
synced += 1
except Exception as e:
ctx.logger.error(f" ❌ Fehlgeschlagen: {e}")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'failed',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
failed += 1
ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}")

View File

@@ -31,7 +31,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
if not entity_ids:
ctx.logger.warn("⚠️ No entity IDs in payload")
return ApiResponse(status_code=400, body={"error": "No entity ID found in payload"})
return ApiResponse(status=400, body={"error": "No entity ID found in payload"})
for eid in entity_ids:
await ctx.enqueue({'topic': 'akte.sync', 'data': {'akte_id': eid, 'aktennummer': None}})
@@ -39,8 +39,8 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
ctx.logger.info(f"✅ Emitted akte.sync for {len(entity_ids)} ID(s): {entity_ids}")
ctx.logger.info("=" * 60)
return ApiResponse(status_code=200, body={"status": "received", "action": "create", "ids_count": len(entity_ids)})
return ApiResponse(status=200, body={"status": "received", "action": "create", "ids_count": len(entity_ids)})
except Exception as e:
ctx.logger.error(f"❌ Webhook error: {e}")
return ApiResponse(status_code=500, body={"error": str(e)})
return ApiResponse(status=500, body={"error": str(e)})

View File

@@ -31,8 +31,8 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
ctx.logger.info(" → Kein Sync (Entität gelöscht)")
ctx.logger.info("=" * 60)
return ApiResponse(status_code=200, body={"status": "received", "action": "delete", "ids_count": len(entity_ids)})
return ApiResponse(status=200, body={"status": "received", "action": "delete", "ids_count": len(entity_ids)})
except Exception as e:
ctx.logger.error(f"❌ Webhook error: {e}")
return ApiResponse(status_code=500, body={"error": str(e)})
return ApiResponse(status=500, body={"error": str(e)})

View File

@@ -31,7 +31,7 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
if not entity_ids:
ctx.logger.warn("⚠️ No entity IDs in payload")
return ApiResponse(status_code=400, body={"error": "No entity ID found in payload"})
return ApiResponse(status=400, body={"error": "No entity ID found in payload"})
for eid in entity_ids:
await ctx.enqueue({'topic': 'akte.sync', 'data': {'akte_id': eid, 'aktennummer': None}})
@@ -39,8 +39,8 @@ async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
ctx.logger.info(f"✅ Emitted akte.sync for {len(entity_ids)} ID(s): {entity_ids}")
ctx.logger.info("=" * 60)
return ApiResponse(status_code=200, body={"status": "received", "action": "update", "ids_count": len(entity_ids)})
return ApiResponse(status=200, body={"status": "received", "action": "update", "ids_count": len(entity_ids)})
except Exception as e:
ctx.logger.error(f"❌ Webhook error: {e}")
return ApiResponse(status_code=500, body={"error": str(e)})
return ApiResponse(status=500, body={"error": str(e)})

View File

@@ -1,89 +0,0 @@
"""
AI Knowledge Daily Full Sync (Cron)
Laueft taeglich um 02:00 Uhr.
Laedt alle CAIKnowledge-Entities mit activationStatus='active'
und syncStatus IN ('unclean', 'failed') und stellt sicher,
dass sie synchroisiert sind.
Emits aiknowledge.sync fuer jede betroffene Entity.
"""
from typing import Any
from motia import FlowContext, cron
from services.espocrm import EspoCRMAPI
from services.logging_utils import get_step_logger
config = {
"name": "AI Knowledge Daily Cron",
"description": "Taeglich: Vollsync aller unclean/failed CAIKnowledge Entities",
"flows": ["vmh-aiknowledge"],
"triggers": [
cron("0 2 * * *"), # Taeglich 02:00 Uhr
],
"enqueues": ["aiknowledge.sync"],
}
async def handler(event_data: Any, ctx: FlowContext[Any]) -> None:
"""
Cron-Handler: Enqueued aiknowledge.sync fuer alle die Sync brauchen.
"""
step_logger = get_step_logger('aiknowledge_cron', ctx)
step_logger.info("=" * 70)
step_logger.info("⏰ AI KNOWLEDGE DAILY CRON START")
step_logger.info("=" * 70)
espocrm = EspoCRMAPI(ctx)
# Alle active KBs mit unclean oder failed Status
try:
result = await espocrm.list_entities(
'CAIKnowledge',
where=[
{
'type': 'equals',
'attribute': 'activationStatus',
'value': 'active',
},
{
'type': 'in',
'attribute': 'syncStatus',
'value': ['unclean', 'failed'],
},
],
max_size=200,
)
except Exception as e:
step_logger.error(f"❌ EspoCRM-Abfrage fehlgeschlagen: {e}")
return
entities = result.get('list', [])
total = result.get('total', len(entities))
step_logger.info(f"📋 {len(entities)}/{total} Entities brauchen Sync")
enqueued = 0
for entity in entities:
knowledge_id = entity.get('id')
name = entity.get('name', knowledge_id)
provider = entity.get('aiProvider', 'xai')
sync_status = entity.get('syncStatus', '?')
if not knowledge_id:
continue
step_logger.info(f" → Enqueue: {name} ({provider}, status={sync_status})")
await ctx.enqueue({
'topic': 'aiknowledge.sync',
'data': {
'knowledge_id': knowledge_id,
'source': 'cron',
'action': 'update',
},
})
enqueued += 1
step_logger.info(f"{enqueued} Sync-Events enqueued")
step_logger.info("=" * 70)

View File

@@ -1,64 +0,0 @@
"""
AI Knowledge Sync Handler
Verarbeitet aiknowledge.sync Events (Queue).
Quellen:
- Webhook: EspoCRM CAIKnowledge.afterSave
- Cron: Taeglich 02:00 Uhr (Vollsync)
Lifecycle:
new → Dataset/Collection erstellen (xAI oder RAGFlow)
active → Dokumente syncen (Change Detection via Blake3)
paused → Skip
deactivated → Dataset/Collection loeschen
"""
from typing import Any, Dict
from motia import FlowContext, queue
from services.espocrm import EspoCRMAPI
from services.redis_client import get_redis_client
from services.aiknowledge_sync_utils import AIKnowledgeSyncUtils
from services.logging_utils import get_step_logger
config = {
"name": "AI Knowledge Sync Handler",
"description": "Synchronisiert CAIKnowledge Entities mit xAI oder RAGFlow",
"flows": ["vmh-aiknowledge"],
"triggers": [
queue("aiknowledge.sync"),
],
"enqueues": [],
}
async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None:
"""
Zentraler Sync-Handler fuer CAIKnowledge.
event_data:
knowledge_id (str) EspoCRM CAIKnowledge ID
source (str) 'webhook' | 'cron'
action (str) 'create' | 'update'
"""
step_logger = get_step_logger('aiknowledge_sync', ctx)
knowledge_id = event_data.get('knowledge_id')
source = event_data.get('source', 'webhook')
action = event_data.get('action', 'update')
if not knowledge_id:
step_logger.error("❌ Kein knowledge_id im Event")
return
step_logger.info("=" * 70)
step_logger.info(f"🔄 AI KNOWLEDGE SYNC EVENT")
step_logger.info(f" ID : {knowledge_id}")
step_logger.info(f" Source: {source} | Action: {action}")
step_logger.info("=" * 70)
espocrm = EspoCRMAPI(ctx)
redis_client = get_redis_client(strict=False)
sync = AIKnowledgeSyncUtils(espocrm, redis_client, ctx)
await sync.run_sync(knowledge_id)