Files
motia-iii/src/steps/crm/akte/akte_sync_event_step.py

704 lines
32 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Akte Sync - Event Handler
Unified sync for one CAkten entity across all configured backends:
- Advoware (3-way merge: Windows ↔ EspoCRM ↔ History)
- xAI (Blake3 hash-based upload to Collection)
Both run in the same event to keep CDokumente perfectly in sync.
Trigger: akte.sync { akte_id, aktennummer }
Lock: Redis per-Akte (30 min TTL, prevents double-sync of same Akte)
Parallel: Different Akten sync simultaneously.
Enqueues:
- document.generate_preview (after CREATE / UPDATE_ESPO)
"""
from typing import Dict, Any
from datetime import datetime
from motia import FlowContext, queue
config = {
"name": "Akte Sync - Event Handler",
"description": "Unified sync for one Akte: Advoware 3-way merge + xAI upload",
"flows": ["akte-sync"],
"triggers": [queue("akte.sync")],
"enqueues": ["document.generate_preview"],
}
VALID_ADVOWARE_STATUSES = frozenset({'import', 'new', 'active'})
VALID_AI_STATUSES = frozenset({'new', 'active'})
# ─────────────────────────────────────────────────────────────────────────────
# Entry point
# ─────────────────────────────────────────────────────────────────────────────
async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
akte_id = event_data.get('akte_id')
aktennummer = event_data.get('aktennummer')
ctx.logger.info("=" * 80)
ctx.logger.info("🔄 AKTE SYNC STARTED")
ctx.logger.info(f" Aktennummer : {aktennummer}")
ctx.logger.info(f" EspoCRM ID : {akte_id}")
ctx.logger.info("=" * 80)
from services.redis_client import get_redis_client
from services.espocrm import EspoCRMAPI
redis_client = get_redis_client(strict=False)
if not redis_client:
ctx.logger.error("❌ Redis unavailable")
return
lock_key = f"akte_sync:{akte_id}"
lock_acquired = redis_client.set(lock_key, datetime.now().isoformat(), nx=True, ex=600)
if not lock_acquired:
ctx.logger.warn(f"⏸️ Lock busy for Akte {akte_id} requeueing")
raise RuntimeError(f"Lock busy for akte_id={akte_id}")
espocrm = EspoCRMAPI(ctx)
try:
# ── Load Akte ──────────────────────────────────────────────────────
akte = await espocrm.get_entity('CAkten', akte_id)
if not akte:
ctx.logger.error(f"❌ Akte {akte_id} not found in EspoCRM")
return
# aktennummer can come from the event payload OR from the entity
# (Akten without Advoware have no aktennummer)
if not aktennummer:
aktennummer = akte.get('aktennummer')
sync_schalter = akte.get('syncSchalter', False)
aktivierungsstatus = str(akte.get('aktivierungsstatus') or '').lower()
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
ai_provider = str(akte.get('aiProvider') or 'xAI')
ctx.logger.info(f"📋 Akte '{akte.get('name')}'")
ctx.logger.info(f" syncSchalter : {sync_schalter}")
ctx.logger.info(f" aktivierungsstatus : {aktivierungsstatus}")
ctx.logger.info(f" aiAktivierungsstatus : {ai_aktivierungsstatus}")
ctx.logger.info(f" aiProvider : {ai_provider}")
# Advoware sync requires an aktennummer (Akten without Advoware won't have one)
advoware_enabled = bool(aktennummer) and sync_schalter and aktivierungsstatus in VALID_ADVOWARE_STATUSES
ai_enabled = ai_aktivierungsstatus in VALID_AI_STATUSES
ctx.logger.info(f" Advoware sync : {'✅ ON' if advoware_enabled else '⏭️ OFF'}")
ctx.logger.info(f" AI sync ({ai_provider}) : {'✅ ON' if ai_enabled else '⏭️ OFF'}")
if not advoware_enabled and not ai_enabled:
ctx.logger.info("⏭️ Both syncs disabled nothing to do")
return
# ── Load CDokumente once (shared by Advoware + xAI sync) ─────────────────
espo_docs: list = []
if advoware_enabled or ai_enabled:
espo_docs = await espocrm.list_related_all('CAkten', akte_id, 'dokumentes')
# ── ADVOWARE SYNC ────────────────────────────────────────────
advoware_results = None
if advoware_enabled:
advoware_results = await _run_advoware_sync(akte, aktennummer, akte_id, espocrm, ctx, espo_docs)
# ── AI SYNC (xAI or RAGflow) ─────────────────────────────────
if ai_enabled:
if ai_provider.lower() == 'ragflow':
await _run_ragflow_sync(akte, akte_id, espocrm, ctx, espo_docs)
else:
await _run_xai_sync(akte, akte_id, espocrm, ctx, espo_docs)
# ── Final Status ───────────────────────────────────────────────────
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
final_update: Dict[str, Any] = {'globalLastSync': now, 'globalSyncStatus': 'synced'}
if advoware_enabled:
final_update['syncStatus'] = 'synced'
final_update['lastSync'] = now
# 'import' = erster Sync → danach auf 'aktiv' setzen
if aktivierungsstatus == 'import':
final_update['aktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aktivierungsstatus: import → active")
if ai_enabled:
final_update['aiSyncStatus'] = 'synced'
final_update['aiLastSync'] = now
# 'new' = Dataset/Collection erstmalig angelegt → auf 'aktiv' setzen
if ai_aktivierungsstatus == 'new':
final_update['aiAktivierungsstatus'] = 'active'
ctx.logger.info("🔄 aiAktivierungsstatus: new → active")
await espocrm.update_entity('CAkten', akte_id, final_update)
# Clean up processing set (Advoware Watcher queue)
if aktennummer:
redis_client.srem("advoware:processing_aktennummern", aktennummer)
ctx.logger.info("=" * 80)
ctx.logger.info("✅ AKTE SYNC COMPLETE")
if advoware_results:
ctx.logger.info(f" Advoware: created={advoware_results['created']} updated={advoware_results['updated']} deleted={advoware_results['deleted']} errors={advoware_results['errors']}")
ctx.logger.info("=" * 80)
except Exception as e:
ctx.logger.error(f"❌ Sync failed: {e}")
import traceback
ctx.logger.error(traceback.format_exc())
# Requeue Advoware aktennummer for retry (Motia retries the akte.sync event itself)
import time
if aktennummer:
redis_client.zadd("advoware:pending_aktennummern", {aktennummer: time.time()})
try:
await espocrm.update_entity('CAkten', akte_id, {
'syncStatus': 'failed',
'globalSyncStatus': 'failed',
})
except Exception:
pass
raise
finally:
if lock_acquired and redis_client:
redis_client.delete(lock_key)
ctx.logger.info(f"🔓 Lock released for Akte {akte_id}")
# ─────────────────────────────────────────────────────────────────────────────
# Advoware 3-way merge
# ─────────────────────────────────────────────────────────────────────────────
async def _run_advoware_sync(
akte: Dict[str, Any],
aktennummer: str,
akte_id: str,
espocrm,
ctx: FlowContext,
espo_docs: list,
) -> Dict[str, int]:
from services.advoware_watcher_service import AdvowareWatcherService
from services.advoware_history_service import AdvowareHistoryService
from services.advoware_service import AdvowareService
from services.advoware_document_sync_utils import AdvowareDocumentSyncUtils
from services.blake3_utils import compute_blake3
import mimetypes
watcher = AdvowareWatcherService(ctx)
history_service = AdvowareHistoryService(ctx)
advoware_service = AdvowareService(ctx)
sync_utils = AdvowareDocumentSyncUtils(ctx)
results = {'created': 0, 'updated': 0, 'deleted': 0, 'skipped': 0, 'errors': 0}
ctx.logger.info("")
ctx.logger.info("" * 60)
ctx.logger.info("📂 ADVOWARE SYNC")
ctx.logger.info("" * 60)
# ── Fetch Windows files + Advoware History ───────────────────────────
try:
windows_files = await watcher.get_akte_files(aktennummer)
except Exception as e:
ctx.logger.error(f"❌ Windows watcher failed: {e}")
windows_files = []
try:
advo_history = await history_service.get_akte_history(aktennummer)
except Exception as e:
ctx.logger.error(f"❌ Advoware history failed: {e}")
advo_history = []
ctx.logger.info(f" EspoCRM docs : {len(espo_docs)}")
ctx.logger.info(f" Windows files : {len(windows_files)}")
ctx.logger.info(f" History entries: {len(advo_history)}")
# ── Cleanup Windows list (only files in History) ───────────────────
windows_files = sync_utils.cleanup_file_list(windows_files, advo_history)
# ── Build indexes by HNR (stable identifier from Advoware) ────────
espo_by_hnr = {}
for doc in espo_docs:
if doc.get('hnr'):
espo_by_hnr[doc['hnr']] = doc
history_by_hnr = {}
for entry in advo_history:
if entry.get('hNr'):
history_by_hnr[entry['hNr']] = entry
windows_by_path = {f.get('path', '').lower(): f for f in windows_files}
all_hnrs = set(espo_by_hnr.keys()) | set(history_by_hnr.keys())
ctx.logger.info(f" Unique HNRs : {len(all_hnrs)}")
now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# ── 3-way merge per HNR ───────────────────────────────────────────
for hnr in all_hnrs:
espo_doc = espo_by_hnr.get(hnr)
history_entry = history_by_hnr.get(hnr)
windows_file = None
if history_entry and history_entry.get('datei'):
windows_file = windows_by_path.get(history_entry['datei'].lower())
if history_entry and history_entry.get('datei'):
filename = history_entry['datei'].split('\\')[-1]
elif espo_doc:
filename = espo_doc.get('name', f'hnr_{hnr}')
else:
filename = f'hnr_{hnr}'
try:
action = sync_utils.merge_three_way(espo_doc, windows_file, history_entry)
ctx.logger.info(f" [{action.action:12s}] {filename} (hnr={hnr}) {action.reason}")
if action.action == 'SKIP':
results['skipped'] += 1
elif action.action == 'CREATE':
if not windows_file:
ctx.logger.error(f" ❌ CREATE: no Windows file for hnr {hnr}")
results['errors'] += 1
continue
content = await watcher.download_file(aktennummer, windows_file.get('relative_path', filename))
blake3_hash = compute_blake3(content)
mime_type, _ = mimetypes.guess_type(filename)
mime_type = mime_type or 'application/octet-stream'
attachment = await espocrm.upload_attachment_for_file_field(
file_content=content,
filename=filename,
related_type='CDokumente',
field='dokument',
mime_type=mime_type,
)
new_doc = await espocrm.create_entity('CDokumente', {
'name': filename,
'dokumentId': attachment.get('id'),
'hnr': history_entry.get('hNr') if history_entry else None,
'advowareArt': (history_entry.get('art', 'Schreiben') or 'Schreiben')[:100] if history_entry else 'Schreiben',
'advowareBemerkung': (history_entry.get('text', '') or '')[:255] if history_entry else '',
'dateipfad': windows_file.get('path', ''),
'blake3hash': blake3_hash,
'syncedHash': blake3_hash,
'usn': windows_file.get('usn', 0),
'syncStatus': 'synced',
'lastSyncTimestamp': now,
'cAktenId': akte_id, # Direct FK to CAkten
})
doc_id = new_doc.get('id')
# Link to Akte
await espocrm.link_entities('CAkten', akte_id, 'dokumentes', doc_id)
results['created'] += 1
# Trigger preview
try:
await ctx.enqueue({'topic': 'document.generate_preview', 'data': {
'entity_id': doc_id,
'entity_type': 'CDokumente',
}})
except Exception as e:
ctx.logger.warn(f" ⚠️ Preview trigger failed: {e}")
elif action.action == 'UPDATE_ESPO':
if not windows_file:
ctx.logger.error(f" ❌ UPDATE_ESPO: no Windows file for hnr {hnr}")
results['errors'] += 1
continue
content = await watcher.download_file(aktennummer, windows_file.get('relative_path', filename))
blake3_hash = compute_blake3(content)
mime_type, _ = mimetypes.guess_type(filename)
mime_type = mime_type or 'application/octet-stream'
update_data: Dict[str, Any] = {
'name': filename,
'blake3hash': blake3_hash,
'syncedHash': blake3_hash,
'usn': windows_file.get('usn', 0),
'dateipfad': windows_file.get('path', ''),
'syncStatus': 'synced',
'lastSyncTimestamp': now,
}
if history_entry:
update_data['hnr'] = history_entry.get('hNr')
update_data['advowareArt'] = (history_entry.get('art', 'Schreiben') or 'Schreiben')[:100]
update_data['advowareBemerkung'] = (history_entry.get('text', '') or '')[:255]
# Mark for re-sync to xAI only if file content actually changed
# (USN can change without content change, e.g. metadata-only updates)
content_changed = blake3_hash != espo_doc.get('syncedHash', '')
if content_changed and espo_doc.get('aiSyncStatus') == 'synced':
update_data['aiSyncStatus'] = 'unclean'
await espocrm.update_entity('CDokumente', espo_doc['id'], update_data)
results['updated'] += 1
try:
await ctx.enqueue({'topic': 'document.generate_preview', 'data': {
'entity_id': espo_doc['id'],
'entity_type': 'CDokumente',
}})
except Exception as e:
ctx.logger.warn(f" ⚠️ Preview trigger failed: {e}")
elif action.action == 'DELETE':
if espo_doc:
# Only delete if the HNR is genuinely absent from Advoware History
# (not just absent from Windows avoids deleting docs whose file
# is temporarily unavailable on the Windows share)
if hnr in history_by_hnr:
ctx.logger.warn(f" ⚠️ SKIP DELETE hnr={hnr}: still in Advoware History, only missing from Windows")
results['skipped'] += 1
else:
await espocrm.delete_entity('CDokumente', espo_doc['id'])
results['deleted'] += 1
except Exception as e:
ctx.logger.error(f" ❌ Error for hnr {hnr} ({filename}): {e}")
results['errors'] += 1
# ── Ablage check + Rubrum sync ─────────────────────────────────────
try:
akte_details = await advoware_service.get_akte(aktennummer)
if akte_details:
espo_update: Dict[str, Any] = {}
if akte_details.get('ablage') == 1:
ctx.logger.info("📁 Akte marked as ablage → deactivating")
espo_update['aktivierungsstatus'] = 'inactive'
rubrum = akte_details.get('rubrum')
if rubrum and rubrum != akte.get('rubrum'):
espo_update['rubrum'] = rubrum
ctx.logger.info(f"📝 Rubrum synced: {rubrum[:80]}")
if espo_update:
await espocrm.update_entity('CAkten', akte_id, espo_update)
except Exception as e:
ctx.logger.warn(f"⚠️ Ablage/Rubrum check failed: {e}")
return results
# ─────────────────────────────────────────────────────────────────────────────
# xAI sync
# ─────────────────────────────────────────────────────────────────────────────
async def _run_xai_sync(
akte: Dict[str, Any],
akte_id: str,
espocrm,
ctx: FlowContext,
docs: list,
) -> None:
from services.xai_service import XAIService
from services.xai_upload_utils import XAIUploadUtils
xai = XAIService(ctx)
upload_utils = XAIUploadUtils(ctx)
ctx.logger.info("")
ctx.logger.info("" * 60)
ctx.logger.info("🤖 xAI SYNC")
ctx.logger.info("" * 60)
try:
# ── Collection-ID ermitteln ────────────────────────────────────
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
collection_id = akte.get('aiCollectionId')
if not collection_id:
if ai_aktivierungsstatus == 'new':
# Status 'new' → neue Collection anlegen
ctx.logger.info(" Status 'new' → Erstelle neue xAI Collection...")
collection_id = await upload_utils.ensure_collection(akte, xai, espocrm)
if not collection_id:
ctx.logger.error("❌ xAI Collection konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
ctx.logger.info(f" ✅ Collection erstellt: {collection_id}")
# aiAktivierungsstatus → 'aktiv' wird in handler final_update gesetzt
else:
# aktiv (oder anderer Status) aber keine Collection-ID → Konfigurationsfehler
ctx.logger.error(
f"❌ aiAktivierungsstatus='{ai_aktivierungsstatus}' aber keine aiCollectionId vorhanden "
f"xAI Sync abgebrochen. Bitte Collection-ID in EspoCRM eintragen."
)
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
else:
# Collection-ID vorhanden → verifizieren ob sie noch in xAI existiert
try:
col = await xai.get_collection(collection_id)
if not col:
ctx.logger.error(f"❌ Collection {collection_id} existiert nicht mehr in xAI Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
ctx.logger.info(f" ✅ Collection verifiziert: {collection_id}")
except Exception as e:
ctx.logger.error(f"❌ Collection-Verifizierung fehlgeschlagen: {e} Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
ctx.logger.info(f" Documents to check: {len(docs)}")
# ── Orphan-Cleanup: xAI-Docs löschen die kein EspoCRM-Äquivalent haben ──
known_xai_file_ids = {doc.get('aiFileId') for doc in docs if doc.get('aiFileId')}
try:
xai_docs = await xai.list_collection_documents(collection_id)
orphans = [d for d in xai_docs if d.get('file_id') not in known_xai_file_ids]
if orphans:
ctx.logger.info(f" 🗑️ Orphan-Cleanup: {len(orphans)} Doc(s) in xAI ohne EspoCRM-Eintrag")
for orphan in orphans:
try:
await xai.remove_from_collection(collection_id, orphan['file_id'])
ctx.logger.info(f" Gelöscht: {orphan.get('filename', orphan['file_id'])}")
except Exception as e:
ctx.logger.warn(f" Orphan-Delete fehlgeschlagen: {e}")
except Exception as e:
ctx.logger.warn(f" ⚠️ Orphan-Cleanup fehlgeschlagen (non-fatal): {e}")
synced = 0
skipped = 0
failed = 0
for doc in docs:
# Determine skip condition based on pre-sync state (avoids stale-dict stats bug)
will_skip = (
doc.get('aiSyncStatus') == 'synced'
and doc.get('aiSyncHash')
and doc.get('blake3hash')
and doc.get('aiSyncHash') == doc.get('blake3hash')
)
ok = await upload_utils.sync_document_to_xai(doc, collection_id, xai, espocrm)
if ok:
if will_skip:
skipped += 1
else:
synced += 1
else:
failed += 1
ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}")
finally:
await xai.close()
# ─────────────────────────────────────────────────────────────────────────────
# RAGflow sync
# ─────────────────────────────────────────────────────────────────────────────
async def _run_ragflow_sync(
akte: Dict[str, Any],
akte_id: str,
espocrm,
ctx: FlowContext,
docs: list,
) -> None:
from services.ragflow_service import RAGFlowService
from urllib.parse import unquote
import mimetypes
ragflow = RAGFlowService(ctx)
ctx.logger.info("")
ctx.logger.info("" * 60)
ctx.logger.info("🧠 RAGflow SYNC")
ctx.logger.info("" * 60)
ai_aktivierungsstatus = str(akte.get('aiAktivierungsstatus') or '').lower()
dataset_id = akte.get('aiCollectionId')
# ── Ensure dataset exists ─────────────────────────────────────────────
if not dataset_id:
if ai_aktivierungsstatus == 'new':
akte_name = akte.get('name') or f"Akte {akte.get('aktennummer', akte_id)}"
ctx.logger.info(f" Status 'new' → Erstelle neues RAGflow Dataset für '{akte_name}'...")
dataset_info = await ragflow.ensure_dataset(akte_name)
if not dataset_info or not dataset_info.get('id'):
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden Sync abgebrochen")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
dataset_id = dataset_info['id']
ctx.logger.info(f" ✅ Dataset erstellt: {dataset_id}")
await espocrm.update_entity('CAkten', akte_id, {'aiCollectionId': dataset_id})
else:
ctx.logger.error(
f"❌ aiAktivierungsstatus='{ai_aktivierungsstatus}' aber keine aiCollectionId "
f"RAGflow Sync abgebrochen. Bitte Dataset-ID in EspoCRM eintragen."
)
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
ctx.logger.info(f" Dataset-ID : {dataset_id}")
ctx.logger.info(f" EspoCRM docs: {len(docs)}")
# ── RAGflow-Bestand abrufen (source of truth) ─────────────────────────
# Lookup: espocrm_id → ragflow_doc (nur Docs die mit espocrm_id getaggt sind)
ragflow_by_espocrm_id: Dict[str, Any] = {}
try:
ragflow_docs = await ragflow.list_documents(dataset_id)
ctx.logger.info(f" RAGflow docs: {len(ragflow_docs)}")
for rd in ragflow_docs:
eid = rd.get('espocrm_id')
if eid:
ragflow_by_espocrm_id[eid] = rd
except Exception as e:
ctx.logger.error(f"❌ RAGflow Dokumentenliste nicht abrufbar: {e}")
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
return
# ── Orphan-Cleanup: RAGflow-Docs die kein EspoCRM-Äquivalent mehr haben ──
espocrm_ids_set = {d['id'] for d in docs}
for rd in ragflow_docs:
eid = rd.get('espocrm_id')
if eid and eid not in espocrm_ids_set:
try:
await ragflow.remove_document(dataset_id, rd['id'])
ctx.logger.info(f" 🗑️ Orphan gelöscht: {rd.get('name', rd['id'])} (espocrm_id={eid})")
except Exception as e:
ctx.logger.warn(f" ⚠️ Orphan-Delete fehlgeschlagen: {e}")
synced = 0
skipped = 0
failed = 0
for doc in docs:
doc_id = doc['id']
doc_name = doc.get('name', doc_id)
blake3_hash = doc.get('blake3hash') or ''
# Was ist aktuell in RAGflow für dieses Dokument?
ragflow_doc = ragflow_by_espocrm_id.get(doc_id)
ragflow_doc_id = ragflow_doc['id'] if ragflow_doc else None
ragflow_blake3 = ragflow_doc.get('blake3_hash', '') if ragflow_doc else ''
ragflow_meta = ragflow_doc.get('meta_fields', {}) if ragflow_doc else {}
# Aktuelle Metadaten aus EspoCRM
current_description = str(doc.get('beschreibung') or '')
current_advo_art = str(doc.get('advowareArt') or '')
current_advo_bemerk = str(doc.get('advowareBemerkung') or '')
content_changed = blake3_hash != ragflow_blake3
meta_changed = (
ragflow_meta.get('description', '') != current_description or
ragflow_meta.get('advoware_art', '') != current_advo_art or
ragflow_meta.get('advoware_bemerkung', '') != current_advo_bemerk
)
ctx.logger.info(f" 📄 {doc_name}")
ctx.logger.info(
f" in_ragflow={bool(ragflow_doc_id)}, "
f"content_changed={content_changed}, meta_changed={meta_changed}"
)
if ragflow_doc_id:
ctx.logger.info(
f" ragflow_blake3={ragflow_blake3[:12] if ragflow_blake3 else 'N/A'}..., "
f"espo_blake3={blake3_hash[:12] if blake3_hash else 'N/A'}..."
)
if not ragflow_doc_id and not content_changed and not meta_changed and not blake3_hash:
# Kein Attachment-Hash vorhanden und noch nie in RAGflow → unsupported
ctx.logger.info(f" ⏭️ Kein Blake3-Hash übersprungen")
skipped += 1
continue
attachment_id = doc.get('dokumentId')
if not attachment_id:
ctx.logger.warn(f" ⚠️ Kein Attachment (dokumentId fehlt) unsupported")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'unsupported',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
skipped += 1
continue
filename = unquote(doc.get('dokumentName') or doc.get('name') or 'document.bin')
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = 'application/octet-stream'
try:
if ragflow_doc_id and not content_changed and meta_changed:
# ── Nur Metadaten aktualisieren ───────────────────────────
ctx.logger.info(f" 🔄 Metadata-Update für {ragflow_doc_id}")
await ragflow.update_document_meta(
dataset_id, ragflow_doc_id,
blake3_hash=blake3_hash,
description=current_description,
advoware_art=current_advo_art,
advoware_bemerkung=current_advo_bemerk,
)
new_ragflow_id = ragflow_doc_id
elif ragflow_doc_id and not content_changed and not meta_changed:
# ── Vollständig unverändert → Skip ────────────────────────
ctx.logger.info(f" ✅ Unverändert kein Re-Upload")
# Tracking-Felder in EspoCRM aktuell halten
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': ragflow_doc_id,
'aiCollectionId': dataset_id,
'aiSyncHash': blake3_hash,
'aiSyncStatus': 'synced',
})
skipped += 1
continue
else:
# ── Upload (neu oder Inhalt geändert) ─────────────────────
if ragflow_doc_id and content_changed:
ctx.logger.info(f" 🗑️ Inhalt geändert altes Dokument löschen: {ragflow_doc_id}")
try:
await ragflow.remove_document(dataset_id, ragflow_doc_id)
except Exception:
pass
ctx.logger.info(f" 📥 Downloading {filename} ({attachment_id})…")
file_content = await espocrm.download_attachment(attachment_id)
ctx.logger.info(f" Downloaded {len(file_content)} bytes")
ctx.logger.info(f" 📤 Uploading '{filename}' ({mime_type})…")
result = await ragflow.upload_document(
dataset_id=dataset_id,
file_content=file_content,
filename=filename,
mime_type=mime_type,
blake3_hash=blake3_hash,
espocrm_id=doc_id,
description=current_description,
advoware_art=current_advo_art,
advoware_bemerkung=current_advo_bemerk,
)
if not result or not result.get('id'):
raise RuntimeError("upload_document gab kein Ergebnis zurück")
new_ragflow_id = result['id']
ctx.logger.info(f" ✅ RAGflow-ID: {new_ragflow_id}")
now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
await espocrm.update_entity('CDokumente', doc_id, {
'aiFileId': new_ragflow_id,
'aiCollectionId': dataset_id,
'aiSyncHash': blake3_hash,
'aiSyncStatus': 'synced',
'aiLastSync': now_str,
})
synced += 1
except Exception as e:
ctx.logger.error(f" ❌ Fehlgeschlagen: {e}")
await espocrm.update_entity('CDokumente', doc_id, {
'aiSyncStatus': 'failed',
'aiLastSync': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
})
failed += 1
ctx.logger.info(f" ✅ Synced : {synced}")
ctx.logger.info(f" ⏭️ Skipped : {skipped}")
ctx.logger.info(f" ❌ Failed : {failed}")