feat: Implement Advoware Document Sync Handler

- Added advoware_document_sync_step.py to handle 3-way merge sync for documents. - Introduced locking mechanism for per-Akte synchronization to allow parallel processing. - Integrated data fetching from EspoCRM, Windows files, and Advoware history. - Implemented 3-way merge logic for document synchronization and metadata updates. - Triggered document preview generation for new/changed documents. feat: Create Shared Steps Module - Added shared/__init__.py for shared steps across multiple modules. - Introduced generate_document_preview_step.py for generating document previews. - Implemented logic to download documents, generate previews, and upload to EspoCRM. feat: Add VMH Document xAI Sync Handler - Created document_xai_sync_step.py to manage document synchronization with xAI collections. - Handled create, update, and delete actions for documents in EspoCRM. - Integrated logic for triggering preview generation and managing xAI collections. - Implemented error handling and logging for synchronization processes.
2026-03-26 01:00:49 +00:00
parent d78a4ee67e
commit 86ec4db9db
6 changed files with 279 additions and 106 deletions
--- a/src/steps/advoware_docs/advoware_document_sync_step.py
+++ b/src/steps/advoware_docs/advoware_document_sync_step.py
@@ -3,6 +3,7 @@ Advoware Document Sync - Event Handler

 Executes 3-way merge sync for one Akte.
 PER-AKTE LOCK: Allows parallel syncs of different Akten.
+Triggers preview generation for new/changed documents.

 Flow:
 1. Acquire per-Akte lock (key: advoware_document_sync:akte:{aktennr})
@@ -17,6 +18,9 @@ Flow:

 PARALLEL EXECUTION: Multiple Akten can sync simultaneously.
 LOCK SCOPE: Only prevents the same Akte from syncing twice at once.
+
+Enqueues:
+- document.generate_preview: Bei CREATE/UPDATE_ESPO
 """

 from typing import Dict, Any
@@ -29,7 +33,7 @@ config = {
    "description": "Execute 3-way merge sync for Akte",
    "flows": ["advoware-document-sync"],
    "triggers": [queue("advoware.document.sync")],
-    "enqueues": [],
+    "enqueues": ["document.generate_preview"],
 }


@@ -154,45 +158,78 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
        windows_files = sync_utils.cleanup_file_list(windows_files, advo_history)
        ctx.logger.info(f"🧹 After cleanup: {len(windows_files)} Windows files with History")
        
-        # 5. Build file mapping for 3-way merge
-        # Create lookup dicts by full path (History uses full path, Windows also has full path)
-        espo_docs_by_name = {doc.get('name', '').lower(): doc for doc in espo_docs}
-        windows_files_by_path = {f.get('path', '').lower(): f for f in windows_files}
-        history_by_path = {}
+        # 5. Build file mapping for 3-way merge based on HNR (stable identifier)
+        # hnr (History Number) is the stable identifier in Advoware - files can change name/path but hnr stays same
        
+        # Index EspoCRM docs by hnr (stable identifier)
+        espo_docs_by_hnr = {}
+        espo_docs_by_path = {}  # Fallback for docs without hnr
+        for doc in espo_docs:
+            hnr = doc.get('hnr')
+            if hnr:
+                espo_docs_by_hnr[hnr] = doc
+            dateipfad = doc.get('dateipfad', '')
+            if dateipfad:
+                espo_docs_by_path[dateipfad.lower()] = doc
+        
+        # Index History by hnr
+        history_by_hnr = {}
+        history_by_path = {}  # For path-based lookup
        for entry in advo_history:
+            hnr = entry.get('hNr')
            datei = entry.get('datei', '')
+            if hnr:
+                history_by_hnr[hnr] = entry
            if datei:
                history_by_path[datei.lower()] = entry
        
-        # Get all unique file paths (Windows files already filtered by cleanup)
-        all_paths = set(windows_files_by_path.keys())
+        # Index Windows files by path (they don't have hnr directly)
+        windows_files_by_path = {f.get('path', '').lower(): f for f in windows_files}
        
-        ctx.logger.info(f"📋 Total unique files: {len(all_paths)}")
+        # Get all unique hnrs to process
+        all_hnrs = set(espo_docs_by_hnr.keys()) | set(history_by_hnr.keys())
        
-        # 6. 3-Way merge per file
+        ctx.logger.info(f"📋 Total unique documents (by hnr): {len(all_hnrs)}")
+        ctx.logger.info(f"   EspoCRM docs with hnr: {len(espo_docs_by_hnr)}")
+        ctx.logger.info(f"   History entries: {len(history_by_hnr)}")
+        ctx.logger.info(f"   Windows files: {len(windows_files_by_path)}")
+        
+        # 6. 3-Way merge per hnr (stable identifier)
        sync_results = {
            'created': 0,
            'uploaded': 0,
            'updated': 0,
+            'deleted': 0,
            'skipped': 0,
            'errors': 0
        }
        
-        for file_path in all_paths:
-            # Extract filename for display and EspoCRM lookup
-            filename = file_path.split('\\')[-1]
+        for hnr in all_hnrs:
+            # Get data for this hnr from all sources
+            espo_doc = espo_docs_by_hnr.get(hnr)
+            history_entry = history_by_hnr.get(hnr)
+            
+            # Get Windows file through history path
+            windows_file = None
+            file_path = None
+            if history_entry:
+                file_path = history_entry.get('datei', '').lower()
+                windows_file = windows_files_by_path.get(file_path)
+            
+            # Extract filename for display
+            if history_entry and history_entry.get('datei'):
+                filename = history_entry.get('datei').split('\\')[-1]
+            elif espo_doc:
+                filename = espo_doc.get('name', f'hnr_{hnr}')
+            else:
+                filename = f'hnr_{hnr}'
            
            ctx.logger.info(f"\n{'='*80}")
-            ctx.logger.info(f"Processing: {filename}")
+            ctx.logger.info(f"Processing: {filename} (hnr: {hnr})")
            ctx.logger.info(f"{'='*80}")
            
-            espo_doc = espo_docs_by_name.get(filename.lower())
-            windows_file = windows_files_by_path.get(file_path)
-            history_entry = history_by_path.get(file_path)
-            
            try:
-                # Perform 3-way merge
+                # Perform 3-way merge based on hnr
                action = sync_utils.merge_three_way(espo_doc, windows_file, history_entry)
                
                ctx.logger.info(f"📊 Merge decision:")
@@ -207,6 +244,11 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
                
                elif action.action == 'CREATE':
                    # Download from Windows and create in EspoCRM
+                    if not windows_file:
+                        ctx.logger.error(f"❌ Cannot CREATE - no Windows file for hnr {hnr}")
+                        sync_results['errors'] += 1
+                        continue
+                        
                    ctx.logger.info(f"📥 Downloading {filename} from Windows...")
                    content = await watcher.download_file(aktennummer, windows_file.get('relative_path', filename))
                    
@@ -271,15 +313,36 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
                    )
                    
                    sync_results['created'] += 1
+                    
+                    # Trigger preview generation
+                    try:
+                        await ctx.emit('document.generate_preview', {
+                            'entity_id': doc_id,
+                            'entity_type': 'CDokumente'
+                        })
+                        ctx.logger.info(f"✅ Preview generation triggered for {doc_id}")
+                    except Exception as e:
+                        ctx.logger.warn(f"⚠️  Failed to trigger preview generation: {e}")
                
                elif action.action == 'UPDATE_ESPO':
                    # Download from Windows and update EspoCRM
+                    if not windows_file:
+                        ctx.logger.error(f"❌ Cannot UPDATE_ESPO - no Windows file for hnr {hnr}")
+                        sync_results['errors'] += 1
+                        continue
+                        
                    ctx.logger.info(f"📥 Downloading {filename} from Windows...")
                    content = await watcher.download_file(aktennummer, windows_file.get('relative_path', filename))
                    
                    # Compute Blake3 hash
                    blake3_hash = compute_blake3(content)
                    
+                    # Determine MIME type
+                    import mimetypes
+                    mime_type, _ = mimetypes.guess_type(filename)
+                    if not mime_type:
+                        mime_type = 'application/octet-stream'
+                    
                    # Extract full Windows path
                    full_path = windows_file.get('path', '')
                    
@@ -288,10 +351,11 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
                    now_iso = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                    
                    update_data = {
+                        'name': filename,  # Update name if changed
                        'blake3hash': blake3_hash,
                        'syncedHash': blake3_hash,
                        'usn': windows_file.get('usn', 0),
-                        'dateipfad': full_path,
+                        'dateipfad': full_path,  # Update path if changed
                        'syncStatus': 'synced',
                        'lastSyncTimestamp': now_iso
                    }
@@ -306,6 +370,16 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
                    
                    ctx.logger.info(f"✅ Updated document: {espo_doc.get('id')}")
                    sync_results['updated'] += 1
+                    
+                    # Trigger preview generation
+                    try:
+                        await ctx.emit('document.generate_preview', {
+                            'entity_id': espo_doc.get('id'),
+                            'entity_type': 'CDokumente'
+                        })
+                        ctx.logger.info(f"✅ Preview generation triggered for {espo_doc.get('id')}")
+                    except Exception as e:
+                        ctx.logger.warn(f"⚠️  Failed to trigger preview generation: {e}")
                
                elif action.action == 'UPLOAD_WINDOWS':
                    # Upload to Windows from EspoCRM
@@ -316,6 +390,19 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
                    ctx.logger.warn(f"⚠️  Upload to Windows not yet implemented for {filename}")
                    sync_results['skipped'] += 1
                
+                elif action.action == 'DELETE':
+                    # Delete from EspoCRM (file deleted in Windows/Advoware)
+                    ctx.logger.info(f"🗑️  Deleting {filename} from EspoCRM...")
+                    
+                    if espo_doc:
+                        doc_id = espo_doc.get('id')
+                        await espocrm.delete_entity('CDokumente', doc_id)
+                        ctx.logger.info(f"✅ Deleted document: {doc_id}")
+                        sync_results['deleted'] += 1
+                    else:
+                        ctx.logger.warn(f"⚠️  No EspoCRM document found for deletion")
+                        sync_results['skipped'] += 1
+                
            except Exception as e:
                ctx.logger.error(f"❌ Error processing {filename}: {e}")
                sync_results['errors'] += 1
@@ -376,6 +463,7 @@ async def handler(event_data: Dict[str, Any], ctx: FlowContext) -> None:
        ctx.logger.info(f"📊 Results:")
        ctx.logger.info(f"   - Created: {sync_results['created']}")
        ctx.logger.info(f"   - Updated: {sync_results['updated']}")
+        ctx.logger.info(f"   - Deleted: {sync_results['deleted']}")
        ctx.logger.info(f"   - Uploaded: {sync_results['uploaded']}")
        ctx.logger.info(f"   - Skipped: {sync_results['skipped']}")
        ctx.logger.info(f"   - Errors: {sync_results['errors']}")
--- a/src/steps/advoware_docs/document_sync_cron_step.py
+++ b/src/steps/advoware_docs/document_sync_cron_step.py
@@ -149,9 +149,10 @@ async def handler(input_data: None, ctx: FlowContext) -> None:
            ctx.logger.info(f"   ├─ Aktivierungsstatus RAW: '{aktivierungsstatus}' (type: {type(aktivierungsstatus).__name__})")
            ctx.logger.info(f"   └─ All akte fields: {list(akte.keys())[:10]}...")  # Debug: Zeige Feldnamen
            
-            # Valid statuses: import, neu, aktiv (case-insensitive)
-            # EspoCRM liefert kleingeschriebene Werte!
-            valid_statuses = ['import', 'neu', 'aktiv']
+            # Valid statuses: Both German and English variants accepted
+            # German: import, neu, aktiv
+            # English: import, new, active
+            valid_statuses = ['import', 'neu', 'aktiv', 'new', 'active']
            aktivierungsstatus_lower = str(aktivierungsstatus).lower().strip()
            
            ctx.logger.info(f"🔍 Status validation:")