feat: Refactor AI Knowledge sync processes to remove full sync parameter and ensure Blake3 verification is always performed

This commit is contained in:
bsiggel
2026-03-12 22:41:19 +00:00
parent 8f1533337c
commit 46c0bbf381
4 changed files with 20 additions and 122 deletions

View File

@@ -227,7 +227,7 @@ class AIKnowledgeSync(BaseSyncUtils):
})
# Sync documents
await self._sync_knowledge_documents(knowledge_id, collection_id, ctx, full_sync=full_sync)
await self._sync_knowledge_documents(knowledge_id, collection_id, ctx)
else:
ctx.logger.error(f"❌ Unknown activationStatus: {activation_status}")
@@ -240,20 +240,18 @@ class AIKnowledgeSync(BaseSyncUtils):
self,
knowledge_id: str,
collection_id: str,
ctx,
full_sync: bool = False
ctx
) -> None:
"""
Sync all documents of a knowledge base to XAI collection.
Uses efficient JunctionData endpoint to get all documents with junction data
and blake3 hashes in a single API call.
and blake3 hashes in a single API call. Hash comparison is always performed.
Args:
knowledge_id: CAIKnowledge entity ID
collection_id: XAI Collection ID
ctx: Motia context
full_sync: If True, force Blake3 hash comparison for all documents (nightly cron)
"""
from services.espocrm import EspoCRMAPI
from services.xai_service import XAIService
@@ -301,8 +299,8 @@ class AIKnowledgeSync(BaseSyncUtils):
if junction_status in ['new', 'unclean', 'failed']:
needs_sync = True
reason = f"status={junction_status}"
elif full_sync and blake3_hash and ai_document_id:
# Full sync mode: verify Blake3 hash with XAI
elif junction_status == 'synced' and blake3_hash and ai_document_id:
# Verify Blake3 hash with XAI (always, since hash from JunctionData API is free)
try:
xai_doc_info = await xai.get_collection_document(collection_id, ai_document_id)
if xai_doc_info:
@@ -310,7 +308,7 @@ class AIKnowledgeSync(BaseSyncUtils):
if xai_blake3 != blake3_hash:
needs_sync = True
reason = f"blake3 mismatch (XAI: {xai_blake3[:16] if xai_blake3 else 'N/A'}... vs Doc: {blake3_hash[:16]}...)"
reason = f"blake3 mismatch (XAI: {xai_blake3[:16] if xai_blake3 else 'N/A'}... vs EspoCRM: {blake3_hash[:16]}...)"
ctx.logger.info(f" 🔄 Blake3 mismatch detected!")
else:
ctx.logger.info(f" ✅ Blake3 hash matches")