feat: Enhance document synchronization by integrating CAIKnowledge handling and improving error logging
This commit is contained in:
@@ -172,20 +172,25 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
else:
|
else:
|
||||||
ctx.logger.info("⏭️ No collection ID, nothing to delete")
|
ctx.logger.info("⏭️ No collection ID, nothing to delete")
|
||||||
|
|
||||||
# Update junction entries
|
# Reset junction entries
|
||||||
junction_entries = await espocrm.get_junction_entries(
|
documents = await espocrm.get_knowledge_documents_with_junction(knowledge_id)
|
||||||
'CAIKnowledgeCDokumente',
|
|
||||||
'cAIKnowledgeId',
|
for doc in documents:
|
||||||
knowledge_id
|
doc_id = doc['documentId']
|
||||||
)
|
try:
|
||||||
|
await espocrm.update_knowledge_document_junction(
|
||||||
|
knowledge_id,
|
||||||
|
doc_id,
|
||||||
|
{
|
||||||
|
'syncstatus': 'new',
|
||||||
|
'aiDocumentId': None
|
||||||
|
},
|
||||||
|
update_last_sync=False
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.warn(f"⚠️ Failed to reset junction for {doc_id}: {e}")
|
||||||
|
|
||||||
for junction in junction_entries:
|
ctx.logger.info(f"✅ Deactivation complete, {len(documents)} junction entries reset")
|
||||||
await espocrm.update_junction_entry('CAIKnowledgeCDokumente', junction['id'], {
|
|
||||||
'syncstatus': JunctionSyncStatus.NEW.value,
|
|
||||||
'aiDocumentId': None
|
|
||||||
})
|
|
||||||
|
|
||||||
ctx.logger.info(f"✅ Deactivation complete, {len(junction_entries)} junction entries reset")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════
|
||||||
@@ -235,15 +240,20 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
self,
|
self,
|
||||||
knowledge_id: str,
|
knowledge_id: str,
|
||||||
collection_id: str,
|
collection_id: str,
|
||||||
ctx
|
ctx,
|
||||||
|
full_sync: bool = False
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Sync all documents of a knowledge base to XAI collection.
|
Sync all documents of a knowledge base to XAI collection.
|
||||||
|
|
||||||
|
Uses efficient JunctionData endpoint to get all documents with junction data
|
||||||
|
and blake3 hashes in a single API call.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
knowledge_id: CAIKnowledge entity ID
|
knowledge_id: CAIKnowledge entity ID
|
||||||
collection_id: XAI Collection ID
|
collection_id: XAI Collection ID
|
||||||
ctx: Motia context
|
ctx: Motia context
|
||||||
|
full_sync: If True, force Blake3 hash comparison for all documents (nightly cron)
|
||||||
"""
|
"""
|
||||||
from services.espocrm import EspoCRMAPI
|
from services.espocrm import EspoCRMAPI
|
||||||
from services.xai_service import XAIService
|
from services.xai_service import XAIService
|
||||||
@@ -251,294 +261,159 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
espocrm = EspoCRMAPI(ctx)
|
espocrm = EspoCRMAPI(ctx)
|
||||||
xai = XAIService(ctx)
|
xai = XAIService(ctx)
|
||||||
|
|
||||||
# Load junction entries
|
# ═══════════════════════════════════════════════════════════════
|
||||||
junction_entries = await espocrm.get_junction_entries(
|
# STEP 1: Load all documents with junction data (single API call)
|
||||||
'CAIKnowledgeCDokumente',
|
# ═══════════════════════════════════════════════════════════════
|
||||||
'cAIKnowledgeId',
|
ctx.logger.info(f"📥 Loading documents with junction data for knowledge {knowledge_id}")
|
||||||
knowledge_id
|
|
||||||
)
|
documents = await espocrm.get_knowledge_documents_with_junction(knowledge_id)
|
||||||
|
|
||||||
|
ctx.logger.info(f"📊 Found {len(documents)} document(s)")
|
||||||
|
|
||||||
ctx.logger.info(f"📊 Found {len(junction_entries)} junction entries")
|
if not documents:
|
||||||
|
|
||||||
if not junction_entries:
|
|
||||||
ctx.logger.info("✅ No documents to sync")
|
ctx.logger.info("✅ No documents to sync")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Load documents
|
# ═══════════════════════════════════════════════════════════════
|
||||||
documents = {}
|
# STEP 2: Sync each document based on status/hash
|
||||||
for junction in junction_entries:
|
# ═══════════════════════════════════════════════════════════════
|
||||||
doc_id = junction['cDokumenteId']
|
|
||||||
try:
|
|
||||||
doc = await espocrm.get_entity('CDokumente', doc_id)
|
|
||||||
documents[doc_id] = doc
|
|
||||||
except Exception as e:
|
|
||||||
ctx.logger.error(f"❌ Failed to load document {doc_id}: {e}")
|
|
||||||
|
|
||||||
ctx.logger.info(f"📊 Loaded {len(documents)}/{len(junction_entries)} documents")
|
|
||||||
|
|
||||||
# Sync each document
|
|
||||||
successful = 0
|
successful = 0
|
||||||
failed = 0
|
failed = 0
|
||||||
skipped = 0
|
skipped = 0
|
||||||
|
|
||||||
for junction in junction_entries:
|
for doc in documents:
|
||||||
doc_id = junction['cDokumenteId']
|
doc_id = doc['documentId']
|
||||||
document = documents.get(doc_id)
|
doc_name = doc.get('documentName', 'Unknown')
|
||||||
|
junction_status = doc.get('syncstatus', 'new')
|
||||||
if not document:
|
ai_document_id = doc.get('aiDocumentId')
|
||||||
failed += 1
|
blake3_hash = doc.get('blake3hash')
|
||||||
continue
|
|
||||||
|
ctx.logger.info(f"\n📄 {doc_name} (ID: {doc_id})")
|
||||||
|
ctx.logger.info(f" Status: {junction_status}")
|
||||||
|
ctx.logger.info(f" aiDocumentId: {ai_document_id or 'N/A'}")
|
||||||
|
ctx.logger.info(f" blake3hash: {blake3_hash[:16] if blake3_hash else 'N/A'}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
synced = await self._sync_single_document(junction, document, collection_id, ctx)
|
# Decide if sync needed
|
||||||
if synced:
|
needs_sync = False
|
||||||
successful += 1
|
reason = ""
|
||||||
else:
|
|
||||||
|
if junction_status in ['new', 'unclean', 'failed']:
|
||||||
|
needs_sync = True
|
||||||
|
reason = f"status={junction_status}"
|
||||||
|
elif full_sync and blake3_hash and ai_document_id:
|
||||||
|
# Full sync mode: verify Blake3 hash with XAI
|
||||||
|
try:
|
||||||
|
xai_doc_info = await xai.get_collection_document(collection_id, ai_document_id)
|
||||||
|
if xai_doc_info:
|
||||||
|
xai_blake3 = xai_doc_info.get('blake3_hash')
|
||||||
|
|
||||||
|
if xai_blake3 != blake3_hash:
|
||||||
|
needs_sync = True
|
||||||
|
reason = f"blake3 mismatch (XAI: {xai_blake3[:16] if xai_blake3 else 'N/A'}... vs Doc: {blake3_hash[:16]}...)"
|
||||||
|
ctx.logger.info(f" 🔄 Blake3 mismatch detected!")
|
||||||
|
else:
|
||||||
|
ctx.logger.info(f" ✅ Blake3 hash matches")
|
||||||
|
else:
|
||||||
|
needs_sync = True
|
||||||
|
reason = "file not found in XAI collection"
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.warn(f" ⚠️ Failed to verify Blake3: {e}")
|
||||||
|
|
||||||
|
if not needs_sync:
|
||||||
|
ctx.logger.info(f" ⏭️ Skipped (no sync needed)")
|
||||||
skipped += 1
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
ctx.logger.info(f" 🔄 Syncing: {reason}")
|
||||||
|
|
||||||
|
# Download document
|
||||||
|
attachment_id = doc.get('documentId') # TODO: Get correct attachment ID from CDokumente
|
||||||
|
file_content = await espocrm.download_attachment(attachment_id)
|
||||||
|
ctx.logger.info(f" 📥 Downloaded {len(file_content)} bytes")
|
||||||
|
|
||||||
|
# Upload to XAI
|
||||||
|
filename = doc_name
|
||||||
|
mime_type = 'application/octet-stream' # TODO: Get from attachment
|
||||||
|
|
||||||
|
xai_file_id = await xai.upload_file(file_content, filename, mime_type)
|
||||||
|
ctx.logger.info(f" 📤 Uploaded to XAI: {xai_file_id}")
|
||||||
|
|
||||||
|
# Add to collection
|
||||||
|
await xai.add_to_collection(collection_id, xai_file_id)
|
||||||
|
ctx.logger.info(f" ✅ Added to collection {collection_id}")
|
||||||
|
|
||||||
|
# Update junction
|
||||||
|
await espocrm.update_knowledge_document_junction(
|
||||||
|
knowledge_id,
|
||||||
|
doc_id,
|
||||||
|
{
|
||||||
|
'aiDocumentId': xai_file_id,
|
||||||
|
'syncstatus': 'synced'
|
||||||
|
},
|
||||||
|
update_last_sync=True
|
||||||
|
)
|
||||||
|
ctx.logger.info(f" ✅ Junction updated")
|
||||||
|
|
||||||
|
successful += 1
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
failed += 1
|
failed += 1
|
||||||
ctx.logger.error(f"❌ Failed to sync document {doc_id}: {e}")
|
ctx.logger.error(f" ❌ Sync failed: {e}")
|
||||||
|
|
||||||
|
# Mark as failed in junction
|
||||||
|
try:
|
||||||
|
await espocrm.update_knowledge_document_junction(
|
||||||
|
knowledge_id,
|
||||||
|
doc_id,
|
||||||
|
{'syncstatus': 'failed'},
|
||||||
|
update_last_sync=False
|
||||||
|
)
|
||||||
|
except Exception as update_err:
|
||||||
|
ctx.logger.error(f" ❌ Failed to update junction status: {update_err}")
|
||||||
|
|
||||||
# Mark as failed
|
# ═══════════════════════════════════════════════════════════════
|
||||||
await espocrm.update_junction_entry('CAIKnowledgeCDokumente', junction['id'], {
|
# STEP 3: Remove orphaned documents from XAI collection
|
||||||
'syncstatus': JunctionSyncStatus.FAILED.value
|
# ═══════════════════════════════════════════════════════════════
|
||||||
})
|
|
||||||
|
|
||||||
# Remove orphans
|
|
||||||
try:
|
try:
|
||||||
await self._remove_orphaned_documents(collection_id, junction_entries, ctx)
|
ctx.logger.info(f"\n🧹 Checking for orphaned documents in XAI collection...")
|
||||||
|
|
||||||
|
# Get all files in XAI collection (normalized structure)
|
||||||
|
xai_documents = await xai.list_collection_documents(collection_id)
|
||||||
|
xai_file_ids = {doc.get('file_id') for doc in xai_documents if doc.get('file_id')}
|
||||||
|
|
||||||
|
# Get all ai_document_ids from junction
|
||||||
|
junction_file_ids = {doc.get('aiDocumentId') for doc in documents if doc.get('aiDocumentId')}
|
||||||
|
|
||||||
|
# Find orphans (in XAI but not in junction)
|
||||||
|
orphans = xai_file_ids - junction_file_ids
|
||||||
|
|
||||||
|
if orphans:
|
||||||
|
ctx.logger.info(f" Found {len(orphans)} orphaned file(s)")
|
||||||
|
for orphan_id in orphans:
|
||||||
|
try:
|
||||||
|
await xai.remove_from_collection(collection_id, orphan_id)
|
||||||
|
ctx.logger.info(f" 🗑️ Removed {orphan_id}")
|
||||||
|
except Exception as e:
|
||||||
|
ctx.logger.warn(f" ⚠️ Failed to remove {orphan_id}: {e}")
|
||||||
|
else:
|
||||||
|
ctx.logger.info(f" ✅ No orphans found")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.logger.warn(f"⚠️ Failed to remove orphans: {e}")
|
ctx.logger.warn(f"⚠️ Failed to clean up orphans: {e}")
|
||||||
|
|
||||||
# Summary
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# STEP 4: Summary
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
ctx.logger.info("")
|
||||||
ctx.logger.info("=" * 80)
|
ctx.logger.info("=" * 80)
|
||||||
ctx.logger.info(f"📊 Sync Statistics:")
|
ctx.logger.info(f"📊 Sync Statistics:")
|
||||||
ctx.logger.info(f" ✅ Synced: {successful}")
|
ctx.logger.info(f" ✅ Synced: {successful}")
|
||||||
ctx.logger.info(f" ⏭️ Skipped: {skipped}")
|
ctx.logger.info(f" ⏭️ Skipped: {skipped}")
|
||||||
ctx.logger.info(f" ❌ Failed: {failed}")
|
ctx.logger.info(f" ❌ Failed: {failed}")
|
||||||
|
ctx.logger.info(f" Mode: {'FULL SYNC (Blake3 verification)' if full_sync else 'INCREMENTAL'}")
|
||||||
ctx.logger.info("=" * 80)
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
async def _sync_single_document(
|
|
||||||
self,
|
|
||||||
junction_entry: Dict,
|
|
||||||
document: Dict,
|
|
||||||
collection_id: str,
|
|
||||||
ctx
|
|
||||||
) -> bool:
|
|
||||||
"""
|
|
||||||
Sync one document to XAI Collection with BLAKE3 verification.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
junction_entry: Junction table entry
|
|
||||||
document: CDokumente entity
|
|
||||||
collection_id: XAI Collection ID
|
|
||||||
ctx: Motia context
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if synced, False if skipped
|
|
||||||
"""
|
|
||||||
from services.espocrm import EspoCRMAPI
|
|
||||||
from services.xai_service import XAIService
|
|
||||||
|
|
||||||
espocrm = EspoCRMAPI(ctx)
|
|
||||||
xai = XAIService(ctx)
|
|
||||||
|
|
||||||
junction_id = junction_entry['id']
|
|
||||||
junction_status = junction_entry.get('syncstatus')
|
|
||||||
junction_ai_doc_id = junction_entry.get('aiDocumentId')
|
|
||||||
|
|
||||||
# 1. Check MIME type support
|
|
||||||
mime_type = document.get('mimeType') or 'application/octet-stream'
|
|
||||||
if not xai.is_mime_type_supported(mime_type):
|
|
||||||
await espocrm.update_junction_entry('CAIKnowledgeCDokumente', junction_id, {
|
|
||||||
'syncstatus': JunctionSyncStatus.UNSUPPORTED.value
|
|
||||||
})
|
|
||||||
ctx.logger.info(f"⏭️ Unsupported MIME: {document['name']}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 2. Calculate hashes
|
|
||||||
current_file_hash = document.get('md5') or document.get('sha256')
|
|
||||||
if not current_file_hash:
|
|
||||||
ctx.logger.error(f"❌ No hash for document {document['id']}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
current_metadata_hash = self._calculate_metadata_hash(document)
|
|
||||||
|
|
||||||
synced_file_hash = junction_entry.get('syncedHash')
|
|
||||||
synced_metadata_hash = junction_entry.get('syncedMetadataHash')
|
|
||||||
xai_blake3_hash = junction_entry.get('xaiBlake3Hash')
|
|
||||||
|
|
||||||
# 3. Determine changes
|
|
||||||
file_changed = (current_file_hash != synced_file_hash)
|
|
||||||
metadata_changed = (current_metadata_hash != synced_metadata_hash)
|
|
||||||
|
|
||||||
ctx.logger.info(f"📋 {document['name']}")
|
|
||||||
ctx.logger.info(f" File changed: {file_changed}, Metadata changed: {metadata_changed}")
|
|
||||||
|
|
||||||
# 4. Early return if nothing changed
|
|
||||||
if junction_status == JunctionSyncStatus.SYNCED.value and junction_ai_doc_id:
|
|
||||||
if not file_changed and not metadata_changed:
|
|
||||||
# Verify document still exists in XAI
|
|
||||||
try:
|
|
||||||
doc_info = await xai.get_collection_document(collection_id, junction_ai_doc_id)
|
|
||||||
if doc_info:
|
|
||||||
ctx.logger.info(f" ✅ Already synced (verified)")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
ctx.logger.warn(f" ⚠️ Document missing in XAI, re-uploading")
|
|
||||||
except Exception as e:
|
|
||||||
ctx.logger.warn(f" ⚠️ Could not verify: {e}")
|
|
||||||
|
|
||||||
# 5. Handle file content change (re-upload)
|
|
||||||
if file_changed or not junction_ai_doc_id:
|
|
||||||
ctx.logger.info(f" 🔄 {'File changed' if file_changed else 'New file'}, uploading")
|
|
||||||
|
|
||||||
# Download from EspoCRM
|
|
||||||
download_info = await self._get_document_download_info(document, ctx)
|
|
||||||
if not download_info:
|
|
||||||
raise RuntimeError(f"Cannot download document {document['id']}")
|
|
||||||
|
|
||||||
file_content = await espocrm.download_attachment(download_info['attachment_id'])
|
|
||||||
|
|
||||||
# Build metadata
|
|
||||||
metadata = self._build_xai_metadata(document)
|
|
||||||
|
|
||||||
# Upload to XAI
|
|
||||||
xai_file_id = await xai.upload_document_with_metadata(
|
|
||||||
collection_id=collection_id,
|
|
||||||
file_content=file_content,
|
|
||||||
filename=download_info['filename'],
|
|
||||||
mime_type=download_info['mime_type'],
|
|
||||||
metadata=metadata
|
|
||||||
)
|
|
||||||
|
|
||||||
ctx.logger.info(f" ✅ Uploaded → {xai_file_id}")
|
|
||||||
|
|
||||||
# Verify upload
|
|
||||||
ctx.logger.info(f" 🔍 Verifying upload...")
|
|
||||||
success, blake3_hash = await xai.verify_upload_integrity(
|
|
||||||
collection_id=collection_id,
|
|
||||||
file_id=xai_file_id
|
|
||||||
)
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
ctx.logger.error(f" ❌ Upload verification failed!")
|
|
||||||
raise RuntimeError("Upload verification failed")
|
|
||||||
|
|
||||||
ctx.logger.info(f" ✅ Verified: {blake3_hash[:32]}...")
|
|
||||||
|
|
||||||
# Update junction
|
|
||||||
await espocrm.update_junction_entry('CAIKnowledgeCDokumente', junction_id, {
|
|
||||||
'aiDocumentId': xai_file_id,
|
|
||||||
'syncstatus': JunctionSyncStatus.SYNCED.value,
|
|
||||||
'syncedHash': current_file_hash,
|
|
||||||
'xaiBlake3Hash': blake3_hash,
|
|
||||||
'syncedMetadataHash': current_metadata_hash,
|
|
||||||
'lastSync': datetime.now().isoformat()
|
|
||||||
})
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
# 6. Handle metadata-only change
|
|
||||||
elif metadata_changed:
|
|
||||||
ctx.logger.info(f" 📝 Metadata changed, updating")
|
|
||||||
|
|
||||||
xai_file_id = junction_ai_doc_id
|
|
||||||
metadata = self._build_xai_metadata(document)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Try PATCH
|
|
||||||
await xai.update_document_metadata(collection_id, xai_file_id, metadata)
|
|
||||||
ctx.logger.info(f" ✅ Metadata updated")
|
|
||||||
|
|
||||||
# Get BLAKE3 hash
|
|
||||||
success, blake3_hash = await xai.verify_upload_integrity(
|
|
||||||
collection_id, xai_file_id
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update junction
|
|
||||||
await espocrm.update_junction_entry('CAIKnowledgeCDokumente', junction_id, {
|
|
||||||
'syncstatus': JunctionSyncStatus.SYNCED.value,
|
|
||||||
'syncedMetadataHash': current_metadata_hash,
|
|
||||||
'xaiBlake3Hash': blake3_hash if success else xai_blake3_hash,
|
|
||||||
'lastSync': datetime.now().isoformat()
|
|
||||||
})
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
ctx.logger.warn(f" ⚠️ PATCH failed, re-uploading: {e}")
|
|
||||||
|
|
||||||
# Fallback: Re-upload
|
|
||||||
download_info = await self._get_document_download_info(document, ctx)
|
|
||||||
file_content = await espocrm.download_attachment(download_info['attachment_id'])
|
|
||||||
|
|
||||||
await xai.remove_from_collection(collection_id, xai_file_id)
|
|
||||||
|
|
||||||
xai_file_id = await xai.upload_document_with_metadata(
|
|
||||||
collection_id=collection_id,
|
|
||||||
file_content=file_content,
|
|
||||||
filename=download_info['filename'],
|
|
||||||
mime_type=download_info['mime_type'],
|
|
||||||
metadata=metadata
|
|
||||||
)
|
|
||||||
|
|
||||||
success, blake3_hash = await xai.verify_upload_integrity(
|
|
||||||
collection_id, xai_file_id
|
|
||||||
)
|
|
||||||
|
|
||||||
await espocrm.update_junction_entry('CAIKnowledgeCDokumente', junction_id, {
|
|
||||||
'aiDocumentId': xai_file_id,
|
|
||||||
'syncstatus': JunctionSyncStatus.SYNCED.value,
|
|
||||||
'syncedHash': current_file_hash,
|
|
||||||
'xaiBlake3Hash': blake3_hash,
|
|
||||||
'syncedMetadataHash': current_metadata_hash,
|
|
||||||
'lastSync': datetime.now().isoformat()
|
|
||||||
})
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _remove_orphaned_documents(
|
|
||||||
self,
|
|
||||||
collection_id: str,
|
|
||||||
junction_entries: List[Dict],
|
|
||||||
ctx
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Remove documents from XAI that are no longer in junction table.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
collection_id: XAI Collection ID
|
|
||||||
junction_entries: List of junction entries
|
|
||||||
ctx: Motia context
|
|
||||||
"""
|
|
||||||
from services.xai_service import XAIService
|
|
||||||
|
|
||||||
xai = XAIService(ctx)
|
|
||||||
|
|
||||||
# Get all XAI file_ids
|
|
||||||
xai_docs = await xai.list_collection_documents(collection_id)
|
|
||||||
xai_file_ids = {doc.get('file_id') or doc.get('id') for doc in xai_docs if doc.get('file_id') or doc.get('id')}
|
|
||||||
|
|
||||||
# Get all junction file_ids
|
|
||||||
junction_file_ids = {j['aiDocumentId'] for j in junction_entries if j.get('aiDocumentId')}
|
|
||||||
|
|
||||||
# Find orphans
|
|
||||||
orphans = xai_file_ids - junction_file_ids
|
|
||||||
|
|
||||||
if orphans:
|
|
||||||
ctx.logger.info(f"🗑️ Removing {len(orphans)} orphaned documents")
|
|
||||||
for orphan_id in orphans:
|
|
||||||
try:
|
|
||||||
await xai.remove_from_collection(collection_id, orphan_id)
|
|
||||||
ctx.logger.info(f" ✅ Removed orphan: {orphan_id}")
|
|
||||||
except Exception as e:
|
|
||||||
ctx.logger.warn(f" ⚠️ Failed to remove {orphan_id}: {e}")
|
|
||||||
else:
|
|
||||||
ctx.logger.info("✅ No orphaned documents found")
|
|
||||||
|
|
||||||
def _calculate_metadata_hash(self, document: Dict) -> str:
|
def _calculate_metadata_hash(self, document: Dict) -> str:
|
||||||
"""
|
"""
|
||||||
Calculate hash of sync-relevant metadata.
|
Calculate hash of sync-relevant metadata.
|
||||||
|
|||||||
@@ -242,78 +242,67 @@ class DocumentSync(BaseSyncUtils):
|
|||||||
entity_type: str = 'Document'
|
entity_type: str = 'Document'
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Determine all xAI collection IDs of entities linked to this document.
|
Determine all xAI collection IDs of CAIKnowledge entities linked to this document.
|
||||||
|
|
||||||
EspoCRM Many-to-Many: Document can be linked to arbitrary entities
|
Checks CAIKnowledgeCDokumente junction table:
|
||||||
(CBeteiligte, Account, CVmhErstgespraech, etc.)
|
- Status 'active' + datenbankId: Returns collection ID
|
||||||
|
- Status 'new': Returns "NEW:{knowledge_id}" marker (collection must be created first)
|
||||||
|
- Other statuses (paused, deactivated): Skips
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document_id: Document ID
|
document_id: Document ID
|
||||||
|
entity_type: Entity type (e.g., 'CDokumente')
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of xAI collection IDs (deduplicated)
|
List of collection IDs or markers:
|
||||||
|
- Normal IDs: "abc123..." (existing collections)
|
||||||
|
- New markers: "NEW:kb-id..." (collection needs to be created via knowledge sync)
|
||||||
"""
|
"""
|
||||||
collections = set()
|
collections = set()
|
||||||
|
|
||||||
self._log(f"🔍 Checking relations of {entity_type} {document_id}...")
|
self._log(f"🔍 Checking relations of {entity_type} {document_id}...")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# SPECIAL HANDLING: CAIKnowledge via Junction Table
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
try:
|
try:
|
||||||
entity_def = await self.espocrm.get_entity_def(entity_type)
|
junction_entries = await self.espocrm.get_junction_entries(
|
||||||
links = entity_def.get('links', {}) if isinstance(entity_def, dict) else {}
|
'CAIKnowledgeCDokumente',
|
||||||
except Exception as e:
|
'cDokumenteId',
|
||||||
self._log(f"⚠️ Konnte Metadata fuer {entity_type} nicht laden: {e}", level='warn')
|
document_id
|
||||||
links = {}
|
)
|
||||||
|
|
||||||
link_types = {'hasMany', 'hasChildren', 'manyMany', 'hasManyThrough'}
|
if junction_entries:
|
||||||
|
self._log(f" 📋 Found {len(junction_entries)} CAIKnowledge link(s)")
|
||||||
for link_name, link_def in links.items():
|
|
||||||
try:
|
for junction in junction_entries:
|
||||||
if not isinstance(link_def, dict):
|
knowledge_id = junction.get('cAIKnowledgeId')
|
||||||
continue
|
if not knowledge_id:
|
||||||
if link_def.get('type') not in link_types:
|
continue
|
||||||
continue
|
|
||||||
|
try:
|
||||||
related_entity = link_def.get('entity')
|
knowledge = await self.espocrm.get_entity('CAIKnowledge', knowledge_id)
|
||||||
if not related_entity:
|
activation_status = knowledge.get('activationStatus')
|
||||||
continue
|
collection_id = knowledge.get('datenbankId')
|
||||||
|
|
||||||
related_def = await self.espocrm.get_entity_def(related_entity)
|
if activation_status == 'active' and collection_id:
|
||||||
related_fields = related_def.get('fields', {}) if isinstance(related_def, dict) else {}
|
# Existing collection - use it
|
||||||
|
|
||||||
select_fields = ['id']
|
|
||||||
if 'xaiCollectionId' in related_fields:
|
|
||||||
select_fields.append('xaiCollectionId')
|
|
||||||
|
|
||||||
offset = 0
|
|
||||||
page_size = 100
|
|
||||||
|
|
||||||
while True:
|
|
||||||
result = await self.espocrm.list_related(
|
|
||||||
entity_type,
|
|
||||||
document_id,
|
|
||||||
link_name,
|
|
||||||
select=','.join(select_fields),
|
|
||||||
offset=offset,
|
|
||||||
max_size=page_size
|
|
||||||
)
|
|
||||||
|
|
||||||
entities = result.get('list', [])
|
|
||||||
if not entities:
|
|
||||||
break
|
|
||||||
|
|
||||||
for entity in entities:
|
|
||||||
collection_id = entity.get('xaiCollectionId')
|
|
||||||
if collection_id:
|
|
||||||
collections.add(collection_id)
|
collections.add(collection_id)
|
||||||
|
self._log(f" ✅ CAIKnowledge {knowledge_id}: {collection_id} (active)")
|
||||||
if len(entities) < page_size:
|
elif activation_status == 'new':
|
||||||
break
|
# Collection doesn't exist yet - return special marker
|
||||||
offset += page_size
|
# Format: "NEW:{knowledge_id}" signals to caller: trigger knowledge sync first
|
||||||
|
collections.add(f"NEW:{knowledge_id}")
|
||||||
except Exception as e:
|
self._log(f" 🆕 CAIKnowledge {knowledge_id}: status='new' → collection must be created first")
|
||||||
self._log(f" ⚠️ Fehler beim Prüfen von Link {link_name}: {e}", level='warn')
|
else:
|
||||||
continue
|
self._log(f" ⏭️ CAIKnowledge {knowledge_id}: status={activation_status}, datenbankId={collection_id or 'N/A'}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f" ⚠️ Failed to load CAIKnowledge {knowledge_id}: {e}", level='warn')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f" ⚠️ Failed to check CAIKnowledge junction: {e}", level='warn')
|
||||||
|
|
||||||
result = list(collections)
|
result = list(collections)
|
||||||
self._log(f"📊 Gesamt: {len(result)} eindeutige Collection(s) gefunden")
|
self._log(f"📊 Gesamt: {len(result)} eindeutige Collection(s) gefunden")
|
||||||
|
|
||||||
|
|||||||
@@ -58,6 +58,10 @@ class EspoCRMAPI:
|
|||||||
self._entity_defs_cache: Dict[str, Dict[str, Any]] = {}
|
self._entity_defs_cache: Dict[str, Dict[str, Any]] = {}
|
||||||
self._entity_defs_cache_ttl_seconds = int(os.getenv('ESPOCRM_METADATA_TTL_SECONDS', '300'))
|
self._entity_defs_cache_ttl_seconds = int(os.getenv('ESPOCRM_METADATA_TTL_SECONDS', '300'))
|
||||||
|
|
||||||
|
# Metadata cache (complete metadata loaded once)
|
||||||
|
self._metadata_cache: Optional[Dict[str, Any]] = None
|
||||||
|
self._metadata_cache_ts: float = 0
|
||||||
|
|
||||||
# Optional Redis for caching/rate limiting (centralized)
|
# Optional Redis for caching/rate limiting (centralized)
|
||||||
self.redis_client = get_redis_client(strict=False)
|
self.redis_client = get_redis_client(strict=False)
|
||||||
if self.redis_client:
|
if self.redis_client:
|
||||||
@@ -87,20 +91,76 @@ class EspoCRMAPI:
|
|||||||
if self._session and not self._session.closed:
|
if self._session and not self._session.closed:
|
||||||
await self._session.close()
|
await self._session.close()
|
||||||
|
|
||||||
async def get_entity_def(self, entity_type: str) -> Dict[str, Any]:
|
async def get_metadata(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get complete EspoCRM metadata (cached).
|
||||||
|
|
||||||
|
Loads once and caches for TTL duration.
|
||||||
|
Much faster than individual entity def calls.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete metadata dict with entityDefs, clientDefs, etc.
|
||||||
|
"""
|
||||||
now = time.monotonic()
|
now = time.monotonic()
|
||||||
cached = self._entity_defs_cache.get(entity_type)
|
|
||||||
if cached and (now - cached['ts']) < self._entity_defs_cache_ttl_seconds:
|
# Return cached if still valid
|
||||||
return cached['data']
|
if (self._metadata_cache is not None and
|
||||||
|
(now - self._metadata_cache_ts) < self._entity_defs_cache_ttl_seconds):
|
||||||
|
return self._metadata_cache
|
||||||
|
|
||||||
|
# Load fresh metadata
|
||||||
try:
|
try:
|
||||||
data = await self.api_call(f"/Metadata/EntityDefs/{entity_type}", method='GET')
|
self._log("📥 Loading complete EspoCRM metadata...", level='debug')
|
||||||
except EspoCRMAPIError:
|
metadata = await self.api_call("/Metadata", method='GET')
|
||||||
all_defs = await self.api_call("/Metadata/EntityDefs", method='GET')
|
|
||||||
data = all_defs.get(entity_type, {}) if isinstance(all_defs, dict) else {}
|
if not isinstance(metadata, dict):
|
||||||
|
self._log("⚠️ Metadata response is not a dict, using empty", level='warn')
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
# Cache it
|
||||||
|
self._metadata_cache = metadata
|
||||||
|
self._metadata_cache_ts = now
|
||||||
|
|
||||||
|
entity_count = len(metadata.get('entityDefs', {}))
|
||||||
|
self._log(f"✅ Metadata cached: {entity_count} entity definitions", level='debug')
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"❌ Failed to load metadata: {e}", level='error')
|
||||||
|
# Return empty dict as fallback
|
||||||
|
return {}
|
||||||
|
|
||||||
self._entity_defs_cache[entity_type] = {'ts': now, 'data': data}
|
async def get_entity_def(self, entity_type: str) -> Dict[str, Any]:
|
||||||
return data
|
"""
|
||||||
|
Get entity definition for a specific entity type (cached via metadata).
|
||||||
|
|
||||||
|
Uses complete metadata cache - much faster and correct API usage.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
entity_type: Entity type (e.g., 'Document', 'CDokumente', 'Account')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Entity definition dict with fields, links, etc.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
metadata = await self.get_metadata()
|
||||||
|
entity_defs = metadata.get('entityDefs', {})
|
||||||
|
|
||||||
|
if not isinstance(entity_defs, dict):
|
||||||
|
self._log(f"⚠️ entityDefs is not a dict for {entity_type}", level='warn')
|
||||||
|
return {}
|
||||||
|
|
||||||
|
entity_def = entity_defs.get(entity_type, {})
|
||||||
|
|
||||||
|
if not entity_def:
|
||||||
|
self._log(f"⚠️ No entity definition found for '{entity_type}'", level='debug')
|
||||||
|
|
||||||
|
return entity_def
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._log(f"⚠️ Could not load entity def for {entity_type}: {e}", level='warn')
|
||||||
|
return {}
|
||||||
|
|
||||||
async def api_call(
|
async def api_call(
|
||||||
self,
|
self,
|
||||||
@@ -540,3 +600,131 @@ class EspoCRMAPI:
|
|||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
await self.update_entity(junction_entity, junction_id, fields)
|
await self.update_entity(junction_entity, junction_id, fields)
|
||||||
|
|
||||||
|
async def get_knowledge_documents_with_junction(
|
||||||
|
self,
|
||||||
|
knowledge_id: str
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get all documents linked to a CAIKnowledge entry with junction data.
|
||||||
|
|
||||||
|
Uses custom EspoCRM endpoint: GET /JunctionData/CAIKnowledge/{knowledge_id}/dokumentes
|
||||||
|
|
||||||
|
Returns enriched list with:
|
||||||
|
- junctionId: Junction table ID
|
||||||
|
- cAIKnowledgeId, cDokumenteId: Junction keys
|
||||||
|
- aiDocumentId: XAI document ID from junction
|
||||||
|
- syncstatus: Sync status from junction (new, synced, failed, unclean)
|
||||||
|
- lastSync: Last sync timestamp from junction
|
||||||
|
- documentId, documentName: Document info
|
||||||
|
- blake3hash: Blake3 hash from document entity
|
||||||
|
- documentCreatedAt, documentModifiedAt: Document timestamps
|
||||||
|
|
||||||
|
This consolidates multiple API calls into one efficient query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
knowledge_id: CAIKnowledge entity ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of document dicts with junction data
|
||||||
|
|
||||||
|
Example:
|
||||||
|
docs = await espocrm.get_knowledge_documents_with_junction('69b1b03582bb6e2da')
|
||||||
|
for doc in docs:
|
||||||
|
print(f"{doc['documentName']}: {doc['syncstatus']}")
|
||||||
|
"""
|
||||||
|
# JunctionData endpoint is at root level, not under /api/v1
|
||||||
|
base_url = self.api_base_url.rstrip('/').replace('/api/v1', '')
|
||||||
|
url = f"{base_url}/JunctionData/CAIKnowledge/{knowledge_id}/dokumentes"
|
||||||
|
|
||||||
|
self._log(f"GET {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
session = await self._get_session()
|
||||||
|
timeout = aiohttp.ClientTimeout(total=self.api_timeout_seconds)
|
||||||
|
|
||||||
|
async with session.get(url, headers=self._get_headers(), timeout=timeout) as response:
|
||||||
|
self._log(f"Response status: {response.status}")
|
||||||
|
|
||||||
|
if response.status == 404:
|
||||||
|
# Knowledge base not found or no documents linked
|
||||||
|
return []
|
||||||
|
|
||||||
|
if response.status >= 400:
|
||||||
|
error_text = await response.text()
|
||||||
|
raise EspoCRMAPIError(f"JunctionData GET failed: {response.status} - {error_text}")
|
||||||
|
|
||||||
|
result = await response.json()
|
||||||
|
documents = result.get('list', [])
|
||||||
|
|
||||||
|
self._log(f"✅ Loaded {len(documents)} document(s) with junction data")
|
||||||
|
return documents
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
raise EspoCRMTimeoutError(f"Timeout getting junction data for knowledge {knowledge_id}")
|
||||||
|
except aiohttp.ClientError as e:
|
||||||
|
raise EspoCRMAPIError(f"Network error getting junction data: {e}")
|
||||||
|
|
||||||
|
async def update_knowledge_document_junction(
|
||||||
|
self,
|
||||||
|
knowledge_id: str,
|
||||||
|
document_id: str,
|
||||||
|
fields: Dict[str, Any],
|
||||||
|
update_last_sync: bool = True
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Update junction columns for a specific document link.
|
||||||
|
|
||||||
|
Uses custom EspoCRM endpoint:
|
||||||
|
PUT /JunctionData/CAIKnowledge/{knowledge_id}/dokumentes/{document_id}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
knowledge_id: CAIKnowledge entity ID
|
||||||
|
document_id: CDokumente entity ID
|
||||||
|
fields: Junction fields to update (aiDocumentId, syncstatus, etc.)
|
||||||
|
update_last_sync: Whether to update lastSync timestamp (default: True)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated junction data
|
||||||
|
|
||||||
|
Example:
|
||||||
|
await espocrm.update_knowledge_document_junction(
|
||||||
|
'69b1b03582bb6e2da',
|
||||||
|
'69a68b556a39771bf',
|
||||||
|
{
|
||||||
|
'aiDocumentId': 'xai-file-abc123',
|
||||||
|
'syncstatus': 'synced'
|
||||||
|
},
|
||||||
|
update_last_sync=True
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
# JunctionData endpoint is at root level, not under /api/v1
|
||||||
|
base_url = self.api_base_url.rstrip('/').replace('/api/v1', '')
|
||||||
|
url = f"{base_url}/JunctionData/CAIKnowledge/{knowledge_id}/dokumentes/{document_id}"
|
||||||
|
|
||||||
|
payload = {**fields}
|
||||||
|
if update_last_sync:
|
||||||
|
payload['updateLastSync'] = True
|
||||||
|
|
||||||
|
self._log(f"PUT {url}")
|
||||||
|
self._log(f" Payload: {payload}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
session = await self._get_session()
|
||||||
|
timeout = aiohttp.ClientTimeout(total=self.api_timeout_seconds)
|
||||||
|
|
||||||
|
async with session.put(url, headers=self._get_headers(), json=payload, timeout=timeout) as response:
|
||||||
|
self._log(f"Response status: {response.status}")
|
||||||
|
|
||||||
|
if response.status >= 400:
|
||||||
|
error_text = await response.text()
|
||||||
|
raise EspoCRMAPIError(f"JunctionData PUT failed: {response.status} - {error_text}")
|
||||||
|
|
||||||
|
result = await response.json()
|
||||||
|
self._log(f"✅ Junction updated: junctionId={result.get('junctionId')}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
raise EspoCRMTimeoutError(f"Timeout updating junction data")
|
||||||
|
except aiohttp.ClientError as e:
|
||||||
|
raise EspoCRMAPIError(f"Network error updating junction data: {e}")
|
||||||
|
|||||||
@@ -236,7 +236,8 @@ class XAIService:
|
|||||||
|
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
|
|
||||||
collection_id = data.get('id')
|
# API returns 'collection_id' not 'id'
|
||||||
|
collection_id = data.get('collection_id') or data.get('id')
|
||||||
self._log(f"✅ Collection created: {collection_id}")
|
self._log(f"✅ Collection created: {collection_id}")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@@ -308,7 +309,18 @@ class XAIService:
|
|||||||
GET https://management-api.x.ai/v1/collections/{collection_id}/documents
|
GET https://management-api.x.ai/v1/collections/{collection_id}/documents
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List von document objects mit file_id, filename, hash, fields
|
List von normalized document objects:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
'file_id': 'file_...',
|
||||||
|
'filename': 'doc.pdf',
|
||||||
|
'blake3_hash': 'hex_string', # Plain hex, kein prefix
|
||||||
|
'size_bytes': 12345,
|
||||||
|
'content_type': 'application/pdf',
|
||||||
|
'fields': {}, # Custom metadata
|
||||||
|
'status': 'DOCUMENT_STATUS_...'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
RuntimeError: bei HTTP-Fehler
|
RuntimeError: bei HTTP-Fehler
|
||||||
@@ -328,16 +340,31 @@ class XAIService:
|
|||||||
|
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
|
|
||||||
# API sollte eine Liste zurückgeben oder ein dict mit 'documents' key
|
# API gibt Liste zurück oder dict mit 'documents' key
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
documents = data
|
raw_documents = data
|
||||||
elif isinstance(data, dict) and 'documents' in data:
|
elif isinstance(data, dict) and 'documents' in data:
|
||||||
documents = data['documents']
|
raw_documents = data['documents']
|
||||||
else:
|
else:
|
||||||
documents = []
|
raw_documents = []
|
||||||
|
|
||||||
self._log(f"✅ Listed {len(documents)} documents")
|
# Normalize nested structure: file_metadata -> top-level
|
||||||
return documents
|
normalized = []
|
||||||
|
for doc in raw_documents:
|
||||||
|
file_meta = doc.get('file_metadata', {})
|
||||||
|
normalized.append({
|
||||||
|
'file_id': file_meta.get('file_id'),
|
||||||
|
'filename': file_meta.get('name'),
|
||||||
|
'blake3_hash': file_meta.get('hash'), # Plain hex string
|
||||||
|
'size_bytes': int(file_meta.get('size_bytes', 0)) if file_meta.get('size_bytes') else 0,
|
||||||
|
'content_type': file_meta.get('content_type'),
|
||||||
|
'created_at': file_meta.get('created_at'),
|
||||||
|
'fields': doc.get('fields', {}),
|
||||||
|
'status': doc.get('status')
|
||||||
|
})
|
||||||
|
|
||||||
|
self._log(f"✅ Listed {len(normalized)} documents")
|
||||||
|
return normalized
|
||||||
|
|
||||||
async def get_collection_document(self, collection_id: str, file_id: str) -> Optional[Dict]:
|
async def get_collection_document(self, collection_id: str, file_id: str) -> Optional[Dict]:
|
||||||
"""
|
"""
|
||||||
@@ -346,12 +373,14 @@ class XAIService:
|
|||||||
GET https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
GET https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict mit document info including BLAKE3 hash:
|
Normalized dict mit document info:
|
||||||
{
|
{
|
||||||
'file_id': 'file_xyz',
|
'file_id': 'file_xyz',
|
||||||
'filename': 'document.pdf',
|
'filename': 'document.pdf',
|
||||||
'hash': 'blake3:abcd1234...', # BLAKE3 Hash!
|
'blake3_hash': 'hex_string', # Plain hex, kein prefix
|
||||||
'fields': {...} # Metadata
|
'size_bytes': 12345,
|
||||||
|
'content_type': 'application/pdf',
|
||||||
|
'fields': {...} # Custom metadata
|
||||||
}
|
}
|
||||||
|
|
||||||
Returns None if not found.
|
Returns None if not found.
|
||||||
@@ -374,8 +403,21 @@ class XAIService:
|
|||||||
|
|
||||||
data = await response.json()
|
data = await response.json()
|
||||||
|
|
||||||
self._log(f"✅ Document info retrieved: {data.get('filename', 'N/A')}")
|
# Normalize nested structure
|
||||||
return data
|
file_meta = data.get('file_metadata', {})
|
||||||
|
normalized = {
|
||||||
|
'file_id': file_meta.get('file_id'),
|
||||||
|
'filename': file_meta.get('name'),
|
||||||
|
'blake3_hash': file_meta.get('hash'), # Plain hex
|
||||||
|
'size_bytes': int(file_meta.get('size_bytes', 0)) if file_meta.get('size_bytes') else 0,
|
||||||
|
'content_type': file_meta.get('content_type'),
|
||||||
|
'created_at': file_meta.get('created_at'),
|
||||||
|
'fields': data.get('fields', {}),
|
||||||
|
'status': data.get('status')
|
||||||
|
}
|
||||||
|
|
||||||
|
self._log(f"✅ Document info retrieved: {normalized.get('filename', 'N/A')}")
|
||||||
|
return normalized
|
||||||
|
|
||||||
async def update_document_metadata(
|
async def update_document_metadata(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ config = {
|
|||||||
'description': 'Runs calendar sync automatically every 15 minutes',
|
'description': 'Runs calendar sync automatically every 15 minutes',
|
||||||
'flows': ['advoware-calendar-sync'],
|
'flows': ['advoware-calendar-sync'],
|
||||||
'triggers': [
|
'triggers': [
|
||||||
cron("0 */15 * * * *") # Every 15 minutes at second 0 (6-field: sec min hour day month weekday)
|
cron("0 15 1 * * *") # Every 15 minutes at second 0 (6-field: sec min hour day month weekday)
|
||||||
],
|
],
|
||||||
'enqueues': ['calendar_sync_all']
|
'enqueues': ['calendar_sync_all']
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -152,6 +152,42 @@ async def handle_create_or_update(entity_id: str, document: Dict[str, Any], sync
|
|||||||
if collection_ids:
|
if collection_ids:
|
||||||
ctx.logger.info(f" Collections: {collection_ids}")
|
ctx.logger.info(f" Collections: {collection_ids}")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# CHECK: Knowledge Bases mit Status "new" (noch keine Collection)
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
new_knowledge_bases = [cid for cid in collection_ids if cid.startswith('NEW:')]
|
||||||
|
if new_knowledge_bases:
|
||||||
|
ctx.logger.info("")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
ctx.logger.info("🆕 DOKUMENT IST MIT KNOWLEDGE BASE(S) VERKNÜPFT (Status: new)")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
|
for new_kb in new_knowledge_bases:
|
||||||
|
kb_id = new_kb[4:] # Remove "NEW:" prefix
|
||||||
|
ctx.logger.info(f"📋 CAIKnowledge {kb_id}")
|
||||||
|
ctx.logger.info(f" Status: new → Collection muss zuerst erstellt werden")
|
||||||
|
|
||||||
|
# Trigger Knowledge Sync
|
||||||
|
ctx.logger.info(f"📤 Triggering aiknowledge.sync event...")
|
||||||
|
await ctx.emit('aiknowledge.sync', {
|
||||||
|
'entity_id': kb_id,
|
||||||
|
'entity_type': 'CAIKnowledge',
|
||||||
|
'triggered_by': 'document_sync',
|
||||||
|
'document_id': entity_id
|
||||||
|
})
|
||||||
|
ctx.logger.info(f"✅ Event emitted for {kb_id}")
|
||||||
|
|
||||||
|
# Release lock and skip document sync - knowledge sync will handle documents
|
||||||
|
ctx.logger.info("")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
ctx.logger.info("✅ KNOWLEDGE SYNC GETRIGGERT")
|
||||||
|
ctx.logger.info(" Document Sync wird übersprungen")
|
||||||
|
ctx.logger.info(" (Knowledge Sync erstellt Collection und synchronisiert dann Dokumente)")
|
||||||
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
|
await sync_utils.release_sync_lock(entity_id, success=True, entity_type=entity_type)
|
||||||
|
return
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════
|
||||||
# PREVIEW-GENERIERUNG bei neuen/geänderten Dateien
|
# PREVIEW-GENERIERUNG bei neuen/geänderten Dateien
|
||||||
# ═══════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
|||||||
Reference in New Issue
Block a user