@@ -76,26 +76,28 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
sync_schalter = akte . get ( ' syncSchalter ' , False )
aktivierungsstatus = str ( akte . get ( ' aktivierungsstatus ' ) or ' ' ) . lower ( )
ai_aktivierungsstatus = str ( akte . get ( ' aiAktivierungsstatus ' ) or ' ' ) . lower ( )
ai_provider = str ( akte . get ( ' aiProvider ' ) or ' xAI ' )
ctx . logger . info ( f " 📋 Akte ' { akte . get ( ' name ' ) } ' " )
ctx . logger . info ( f " syncSchalter : { sync_schalter } " )
ctx . logger . info ( f " aktivierungsstatus : { aktivierungsstatus } " )
ctx . logger . info ( f " aiAktivierungsstatus : { ai_aktivierungsstatus } " )
ctx . logger . info ( f " aiProvider : { ai_provider } " )
# Advoware sync requires an aktennummer (Akten without Advoware won't have one)
advoware_enabled = bool ( aktennummer ) and sync_schalter and aktivierungsstatus in VALID_ADVOWARE_STATUSES
x ai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ctx . logger . info ( f " Advoware sync : { ' ✅ ON ' if advoware_enabled else ' ⏭️ OFF ' } " )
ctx . logger . info ( f " x AI sync : { ' ✅ ON ' if x ai_enabled else ' ⏭️ OFF ' } " )
ctx . logger . info ( f " AI sync ( { ai_provider } ) : { ' ✅ ON ' if ai_enabled else ' ⏭️ OFF ' } " )
if not advoware_enabled and not x ai_enabled:
if not advoware_enabled and not ai_enabled :
ctx . logger . info ( " ⏭️ Both syncs disabled – nothing to do " )
return
# ── Load CDokumente once (shared by Advoware + xAI sync) ─────────────────
espo_docs : list = [ ]
if advoware_enabled or x ai_enabled:
if advoware_enabled or ai_enabled :
espo_docs = await espocrm . list_related_all ( ' CAkten ' , akte_id , ' dokumentes ' )
# ── ADVOWARE SYNC ────────────────────────────────────────────
@@ -103,9 +105,12 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
if advoware_enabled :
advoware_results = await _run_advoware_sync ( akte , aktennummer , akte_id , espocrm , ctx , espo_docs )
# ── x AI SYNC ─────────────── ─────────────────────────────────
if x ai_enabled:
await _run_xai_sync ( akte , akte_id , espocrm , ctx , espo_docs )
# ── AI SYNC (xAI or RAGflow) ─────────────────────────────────
if ai_enabled :
if ai_provider . lower ( ) == ' ragflow ' :
await _run_ragflow_sync ( akte , akte_id , espocrm , ctx , espo_docs )
else :
await _run_xai_sync ( akte , akte_id , espocrm , ctx , espo_docs )
# ── Final Status ───────────────────────────────────────────────────
now = datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
@@ -117,10 +122,10 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
if aktivierungsstatus == ' import ' :
final_update [ ' aktivierungsstatus ' ] = ' active '
ctx . logger . info ( " 🔄 aktivierungsstatus: import → active " )
if x ai_enabled:
if ai_enabled :
final_update [ ' aiSyncStatus ' ] = ' synced '
final_update [ ' aiLastSync ' ] = now
# 'new' = Collection wurde gerade erstmalig angelegt → auf 'aktiv' setzen
# 'new' = Dataset/ Collection erstmalig angelegt → auf 'aktiv' setzen
if ai_aktivierungsstatus == ' new ' :
final_update [ ' aiAktivierungsstatus ' ] = ' active '
ctx . logger . info ( " 🔄 aiAktivierungsstatus: new → active " )
@@ -483,3 +488,216 @@ async def _run_xai_sync(
finally :
await xai . close ( )
# ─────────────────────────────────────────────────────────────────────────────
# RAGflow sync
# ─────────────────────────────────────────────────────────────────────────────
async def _run_ragflow_sync (
akte : Dict [ str , Any ] ,
akte_id : str ,
espocrm ,
ctx : FlowContext ,
docs : list ,
) - > None :
from services . ragflow_service import RAGFlowService
from urllib . parse import unquote
import mimetypes
ragflow = RAGFlowService ( ctx )
ctx . logger . info ( " " )
ctx . logger . info ( " ─ " * 60 )
ctx . logger . info ( " 🧠 RAGflow SYNC " )
ctx . logger . info ( " ─ " * 60 )
ai_aktivierungsstatus = str ( akte . get ( ' aiAktivierungsstatus ' ) or ' ' ) . lower ( )
dataset_id = akte . get ( ' aiCollectionId ' )
# ── Ensure dataset exists ─────────────────────────────────────────────
if not dataset_id :
if ai_aktivierungsstatus == ' new ' :
akte_name = akte . get ( ' name ' ) or f " Akte { akte . get ( ' aktennummer ' , akte_id ) } "
ctx . logger . info ( f " Status ' new ' → Erstelle neues RAGflow Dataset für ' { akte_name } ' ... " )
dataset_info = await ragflow . ensure_dataset ( akte_name )
if not dataset_info or not dataset_info . get ( ' id ' ) :
ctx . logger . error ( " ❌ RAGflow Dataset konnte nicht erstellt werden – Sync abgebrochen " )
await espocrm . update_entity ( ' CAkten ' , akte_id , { ' aiSyncStatus ' : ' failed ' } )
return
dataset_id = dataset_info [ ' id ' ]
ctx . logger . info ( f " ✅ Dataset erstellt: { dataset_id } " )
await espocrm . update_entity ( ' CAkten ' , akte_id , { ' aiCollectionId ' : dataset_id } )
else :
ctx . logger . error (
f " ❌ aiAktivierungsstatus= ' { ai_aktivierungsstatus } ' aber keine aiCollectionId – "
f " RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen. "
)
await espocrm . update_entity ( ' CAkten ' , akte_id , { ' aiSyncStatus ' : ' failed ' } )
return
ctx . logger . info ( f " Dataset-ID : { dataset_id } " )
ctx . logger . info ( f " EspoCRM docs: { len ( docs ) } " )
# ── RAGflow-Bestand abrufen (source of truth) ─────────────────────────
# Lookup: espocrm_id → ragflow_doc (nur Docs die mit espocrm_id getaggt sind)
ragflow_by_espocrm_id : Dict [ str , Any ] = { }
try :
ragflow_docs = await ragflow . list_documents ( dataset_id )
ctx . logger . info ( f " RAGflow docs: { len ( ragflow_docs ) } " )
for rd in ragflow_docs :
eid = rd . get ( ' espocrm_id ' )
if eid :
ragflow_by_espocrm_id [ eid ] = rd
except Exception as e :
ctx . logger . error ( f " ❌ RAGflow Dokumentenliste nicht abrufbar: { e } " )
await espocrm . update_entity ( ' CAkten ' , akte_id , { ' aiSyncStatus ' : ' failed ' } )
return
# ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ──
espocrm_ids_set = { d [ ' id ' ] for d in docs }
for rd in ragflow_docs :
eid = rd . get ( ' espocrm_id ' )
if eid and eid not in espocrm_ids_set :
try :
await ragflow . remove_document ( dataset_id , rd [ ' id ' ] )
ctx . logger . info ( f " 🗑️ Orphan gelöscht: { rd . get ( ' name ' , rd [ ' id ' ] ) } (espocrm_id= { eid } ) " )
except Exception as e :
ctx . logger . warn ( f " ⚠️ Orphan-Delete fehlgeschlagen: { e } " )
synced = 0
skipped = 0
failed = 0
for doc in docs :
doc_id = doc [ ' id ' ]
doc_name = doc . get ( ' name ' , doc_id )
blake3_hash = doc . get ( ' blake3hash ' ) or ' '
# Was ist aktuell in RAGflow für dieses Dokument?
ragflow_doc = ragflow_by_espocrm_id . get ( doc_id )
ragflow_doc_id = ragflow_doc [ ' id ' ] if ragflow_doc else None
ragflow_blake3 = ragflow_doc . get ( ' blake3_hash ' , ' ' ) if ragflow_doc else ' '
ragflow_meta = ragflow_doc . get ( ' meta_fields ' , { } ) if ragflow_doc else { }
# Aktuelle Metadaten aus EspoCRM
current_description = str ( doc . get ( ' beschreibung ' ) or ' ' )
current_advo_art = str ( doc . get ( ' advowareArt ' ) or ' ' )
current_advo_bemerk = str ( doc . get ( ' advowareBemerkung ' ) or ' ' )
content_changed = blake3_hash != ragflow_blake3
meta_changed = (
ragflow_meta . get ( ' description ' , ' ' ) != current_description or
ragflow_meta . get ( ' advoware_art ' , ' ' ) != current_advo_art or
ragflow_meta . get ( ' advoware_bemerkung ' , ' ' ) != current_advo_bemerk
)
ctx . logger . info ( f " 📄 { doc_name } " )
ctx . logger . info (
f " in_ragflow= { bool ( ragflow_doc_id ) } , "
f " content_changed= { content_changed } , meta_changed= { meta_changed } "
)
if ragflow_doc_id :
ctx . logger . info (
f " ragflow_blake3= { ragflow_blake3 [ : 12 ] if ragflow_blake3 else ' N/A ' } ..., "
f " espo_blake3= { blake3_hash [ : 12 ] if blake3_hash else ' N/A ' } ... "
)
if not ragflow_doc_id and not content_changed and not meta_changed and not blake3_hash :
# Kein Attachment-Hash vorhanden und noch nie in RAGflow → unsupported
ctx . logger . info ( f " ⏭️ Kein Blake3-Hash – übersprungen " )
skipped + = 1
continue
attachment_id = doc . get ( ' dokumentId ' )
if not attachment_id :
ctx . logger . warn ( f " ⚠️ Kein Attachment (dokumentId fehlt) – unsupported " )
await espocrm . update_entity ( ' CDokumente ' , doc_id , {
' aiSyncStatus ' : ' unsupported ' ,
' aiLastSync ' : datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' ) ,
} )
skipped + = 1
continue
filename = unquote ( doc . get ( ' dokumentName ' ) or doc . get ( ' name ' ) or ' document.bin ' )
mime_type , _ = mimetypes . guess_type ( filename )
if not mime_type :
mime_type = ' application/octet-stream '
try :
if ragflow_doc_id and not content_changed and meta_changed :
# ── Nur Metadaten aktualisieren ───────────────────────────
ctx . logger . info ( f " 🔄 Metadata-Update für { ragflow_doc_id } … " )
await ragflow . update_document_meta (
dataset_id , ragflow_doc_id ,
blake3_hash = blake3_hash ,
description = current_description ,
advoware_art = current_advo_art ,
advoware_bemerkung = current_advo_bemerk ,
)
new_ragflow_id = ragflow_doc_id
elif ragflow_doc_id and not content_changed and not meta_changed :
# ── Vollständig unverändert → Skip ────────────────────────
ctx . logger . info ( f " ✅ Unverändert – kein Re-Upload " )
# Tracking-Felder in EspoCRM aktuell halten
await espocrm . update_entity ( ' CDokumente ' , doc_id , {
' aiFileId ' : ragflow_doc_id ,
' aiCollectionId ' : dataset_id ,
' aiSyncHash ' : blake3_hash ,
' aiSyncStatus ' : ' synced ' ,
} )
skipped + = 1
continue
else :
# ── Upload (neu oder Inhalt geändert) ─────────────────────
if ragflow_doc_id and content_changed :
ctx . logger . info ( f " 🗑️ Inhalt geändert – altes Dokument löschen: { ragflow_doc_id } " )
try :
await ragflow . remove_document ( dataset_id , ragflow_doc_id )
except Exception :
pass
ctx . logger . info ( f " 📥 Downloading { filename } ( { attachment_id } )… " )
file_content = await espocrm . download_attachment ( attachment_id )
ctx . logger . info ( f " Downloaded { len ( file_content ) } bytes " )
ctx . logger . info ( f " 📤 Uploading ' { filename } ' ( { mime_type } )… " )
result = await ragflow . upload_document (
dataset_id = dataset_id ,
file_content = file_content ,
filename = filename ,
mime_type = mime_type ,
blake3_hash = blake3_hash ,
espocrm_id = doc_id ,
description = current_description ,
advoware_art = current_advo_art ,
advoware_bemerkung = current_advo_bemerk ,
)
if not result or not result . get ( ' id ' ) :
raise RuntimeError ( " upload_document gab kein Ergebnis zurück " )
new_ragflow_id = result [ ' id ' ]
ctx . logger . info ( f " ✅ RAGflow-ID: { new_ragflow_id } " )
now_str = datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' )
await espocrm . update_entity ( ' CDokumente ' , doc_id , {
' aiFileId ' : new_ragflow_id ,
' aiCollectionId ' : dataset_id ,
' aiSyncHash ' : blake3_hash ,
' aiSyncStatus ' : ' synced ' ,
' aiLastSync ' : now_str ,
} )
synced + = 1
except Exception as e :
ctx . logger . error ( f " ❌ Fehlgeschlagen: { e } " )
await espocrm . update_entity ( ' CDokumente ' , doc_id , {
' aiSyncStatus ' : ' failed ' ,
' aiLastSync ' : datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' ) ,
} )
failed + = 1
ctx . logger . info ( f " ✅ Synced : { synced } " )
ctx . logger . info ( f " ⏭️ Skipped : { skipped } " )
ctx . logger . info ( f " ❌ Failed : { failed } " )