feat: Implement Advoware Document Sync Handler

- Added advoware_document_sync_step.py to handle 3-way merge sync for documents.
- Introduced locking mechanism for per-Akte synchronization to allow parallel processing.
- Integrated data fetching from EspoCRM, Windows files, and Advoware history.
- Implemented 3-way merge logic for document synchronization and metadata updates.
- Triggered document preview generation for new/changed documents.

feat: Create Shared Steps Module

- Added shared/__init__.py for shared steps across multiple modules.
- Introduced generate_document_preview_step.py for generating document previews.
- Implemented logic to download documents, generate previews, and upload to EspoCRM.

feat: Add VMH Document xAI Sync Handler

- Created document_xai_sync_step.py to manage document synchronization with xAI collections.
- Handled create, update, and delete actions for documents in EspoCRM.
- Integrated logic for triggering preview generation and managing xAI collections.
- Implemented error handling and logging for synchronization processes.
This commit is contained in:
bsiggel
2026-03-26 01:00:49 +00:00
parent d78a4ee67e
commit 86ec4db9db
6 changed files with 279 additions and 106 deletions

View File

@@ -3,6 +3,7 @@ Advoware Document Sync - Event Handler
Executes 3-way merge sync for one Akte.
PER-AKTE LOCK: Allows parallel syncs of different Akten.
Triggers preview generation for new/changed documents.
Flow:
1. Acquire per-Akte lock (key: advoware_document_sync:akte:{aktennr})
@@ -17,6 +18,9 @@ Flow:
PARALLEL EXECUTION: Multiple Akten can sync simultaneously.
LOCK SCOPE: Only prevents the same Akte from syncing twice at once.
Enqueues:
- document.generate_preview: Bei CREATE/UPDATE_ESPO
"""
from typing import Dict, Any
@@ -29,7 +33,7 @@ config = {
"description": "Execute 3-way merge sync for Akte",
"flows": ["advoware-document-sync"],
"triggers": [queue("advoware.document.sync")],
"enqueues": [],
"enqueues": ["document.generate_preview"],
}
@@ -154,45 +158,78 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
windows_files = sync_utils.cleanup_file_list(windows_files, advo_history)
ctx.logger.info(f"🧹 After cleanup: {len(windows_files)} Windows files with History")
# 5. Build file mapping for 3-way merge
# Create lookup dicts by full path (History uses full path, Windows also has full path)
espo_docs_by_name = {doc.get('name', '').lower(): doc for doc in espo_docs}
windows_files_by_path = {f.get('path', '').lower(): f for f in windows_files}
history_by_path = {}
# 5. Build file mapping for 3-way merge based on HNR (stable identifier)
# hnr (History Number) is the stable identifier in Advoware - files can change name/path but hnr stays same
# Index EspoCRM docs by hnr (stable identifier)
espo_docs_by_hnr = {}
espo_docs_by_path = {} # Fallback for docs without hnr
for doc in espo_docs:
hnr = doc.get('hnr')
if hnr:
espo_docs_by_hnr[hnr] = doc
dateipfad = doc.get('dateipfad', '')
if dateipfad:
espo_docs_by_path[dateipfad.lower()] = doc
# Index History by hnr
history_by_hnr = {}
history_by_path = {} # For path-based lookup
for entry in advo_history:
hnr = entry.get('hNr')
datei = entry.get('datei', '')
if hnr:
history_by_hnr[hnr] = entry
if datei:
history_by_path[datei.lower()] = entry
# Get all unique file paths (Windows files already filtered by cleanup)
all_paths = set(windows_files_by_path.keys())
# Index Windows files by path (they don't have hnr directly)
windows_files_by_path = {f.get('path', '').lower(): f for f in windows_files}
ctx.logger.info(f"📋 Total unique files: {len(all_paths)}")
# Get all unique hnrs to process
all_hnrs = set(espo_docs_by_hnr.keys()) | set(history_by_hnr.keys())
# 6. 3-Way merge per file
ctx.logger.info(f"📋 Total unique documents (by hnr): {len(all_hnrs)}")
ctx.logger.info(f" EspoCRM docs with hnr: {len(espo_docs_by_hnr)}")
ctx.logger.info(f" History entries: {len(history_by_hnr)}")
ctx.logger.info(f" Windows files: {len(windows_files_by_path)}")
# 6. 3-Way merge per hnr (stable identifier)
sync_results = {
'created': 0,
'uploaded': 0,
'updated': 0,
'deleted': 0,
'skipped': 0,
'errors': 0
}
for file_path in all_paths:
# Extract filename for display and EspoCRM lookup
filename = file_path.split('\\')[-1]
for hnr in all_hnrs:
# Get data for this hnr from all sources
espo_doc = espo_docs_by_hnr.get(hnr)
history_entry = history_by_hnr.get(hnr)
# Get Windows file through history path
windows_file = None
file_path = None
if history_entry:
file_path = history_entry.get('datei', '').lower()
windows_file = windows_files_by_path.get(file_path)
# Extract filename for display
if history_entry and history_entry.get('datei'):
filename = history_entry.get('datei').split('\\')[-1]
elif espo_doc:
filename = espo_doc.get('name', f'hnr_{hnr}')
else:
filename = f'hnr_{hnr}'
ctx.logger.info(f"\n{'='*80}")
ctx.logger.info(f"Processing: {filename}")
ctx.logger.info(f"Processing: {filename} (hnr: {hnr})")
ctx.logger.info(f"{'='*80}")
espo_doc = espo_docs_by_name.get(filename.lower())
windows_file = windows_files_by_path.get(file_path)
history_entry = history_by_path.get(file_path)
try:
# Perform 3-way merge
# Perform 3-way merge based on hnr
action = sync_utils.merge_three_way(espo_doc, windows_file, history_entry)
ctx.logger.info(f"📊 Merge decision:")
@@ -207,6 +244,11 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
elif action.action == 'CREATE':
# Download from Windows and create in EspoCRM
if not windows_file:
ctx.logger.error(f"❌ Cannot CREATE - no Windows file for hnr {hnr}")
sync_results['errors'] += 1
continue
ctx.logger.info(f"📥 Downloading {filename} from Windows...")
content = await watcher.download_file(aktennummer, windows_file.get('relative_path', filename))
@@ -271,15 +313,36 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
)
sync_results['created'] += 1
# Trigger preview generation
try:
await ctx.emit('document.generate_preview', {
'entity_id': doc_id,
'entity_type': 'CDokumente'
})
ctx.logger.info(f"✅ Preview generation triggered for {doc_id}")
except Exception as e:
ctx.logger.warn(f"⚠️ Failed to trigger preview generation: {e}")
elif action.action == 'UPDATE_ESPO':
# Download from Windows and update EspoCRM
if not windows_file:
ctx.logger.error(f"❌ Cannot UPDATE_ESPO - no Windows file for hnr {hnr}")
sync_results['errors'] += 1
continue
ctx.logger.info(f"📥 Downloading {filename} from Windows...")
content = await watcher.download_file(aktennummer, windows_file.get('relative_path', filename))
# Compute Blake3 hash
blake3_hash = compute_blake3(content)
# Determine MIME type
import mimetypes
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = 'application/octet-stream'
# Extract full Windows path
full_path = windows_file.get('path', '')
@@ -288,10 +351,11 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
now_iso = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
update_data = {
'name': filename, # Update name if changed
'blake3hash': blake3_hash,
'syncedHash': blake3_hash,
'usn': windows_file.get('usn', 0),
'dateipfad': full_path,
'dateipfad': full_path, # Update path if changed
'syncStatus': 'synced',
'lastSyncTimestamp': now_iso
}
@@ -306,6 +370,16 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
ctx.logger.info(f"✅ Updated document: {espo_doc.get('id')}")
sync_results['updated'] += 1
# Trigger preview generation
try:
await ctx.emit('document.generate_preview', {
'entity_id': espo_doc.get('id'),
'entity_type': 'CDokumente'
})
ctx.logger.info(f"✅ Preview generation triggered for {espo_doc.get('id')}")
except Exception as e:
ctx.logger.warn(f"⚠️ Failed to trigger preview generation: {e}")
elif action.action == 'UPLOAD_WINDOWS':
# Upload to Windows from EspoCRM
@@ -316,6 +390,19 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
ctx.logger.warn(f"⚠️ Upload to Windows not yet implemented for {filename}")
sync_results['skipped'] += 1
elif action.action == 'DELETE':
# Delete from EspoCRM (file deleted in Windows/Advoware)
ctx.logger.info(f"🗑️ Deleting {filename} from EspoCRM...")
if espo_doc:
doc_id = espo_doc.get('id')
await espocrm.delete_entity('CDokumente', doc_id)
ctx.logger.info(f"✅ Deleted document: {doc_id}")
sync_results['deleted'] += 1
else:
ctx.logger.warn(f"⚠️ No EspoCRM document found for deletion")
sync_results['skipped'] += 1
except Exception as e:
ctx.logger.error(f"❌ Error processing {filename}: {e}")
sync_results['errors'] += 1
@@ -376,6 +463,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
ctx.logger.info(f"📊 Results:")
ctx.logger.info(f" - Created: {sync_results['created']}")
ctx.logger.info(f" - Updated: {sync_results['updated']}")
ctx.logger.info(f" - Deleted: {sync_results['deleted']}")
ctx.logger.info(f" - Uploaded: {sync_results['uploaded']}")
ctx.logger.info(f" - Skipped: {sync_results['skipped']}")
ctx.logger.info(f" - Errors: {sync_results['errors']}")

View File

@@ -149,9 +149,10 @@ async def handler(input_data: None, ctx: FlowContext) -> None:
ctx.logger.info(f" ├─ Aktivierungsstatus RAW: '{aktivierungsstatus}' (type: {type(aktivierungsstatus).__name__})")
ctx.logger.info(f" └─ All akte fields: {list(akte.keys())[:10]}...") # Debug: Zeige Feldnamen
# Valid statuses: import, neu, aktiv (case-insensitive)
# EspoCRM liefert kleingeschriebene Werte!
valid_statuses = ['import', 'neu', 'aktiv']
# Valid statuses: Both German and English variants accepted
# German: import, neu, aktiv
# English: import, new, active
valid_statuses = ['import', 'neu', 'aktiv', 'new', 'active']
aktivierungsstatus_lower = str(aktivierungsstatus).lower().strip()
ctx.logger.info(f"🔍 Status validation:")