feat: Implement AI Knowledge Sync Utilities and Event Handlers

- Added AIKnowledgeActivationStatus and AIKnowledgeSyncStatus enums to models.py for managing activation and sync states.
- Introduced AIKnowledgeSync class in aiknowledge_sync_utils.py for synchronizing CAIKnowledge entities with XAI Collections, including collection lifecycle management, document synchronization, and metadata updates.
- Created a daily cron job (aiknowledge_full_sync_cron_step.py) to perform a full sync of CAIKnowledge entities.
- Developed an event handler (aiknowledge_sync_event_step.py) to synchronize CAIKnowledge entities with XAI Collections triggered by webhooks and cron jobs.
- Implemented a webhook handler (aiknowledge_update_api_step.py) to receive updates from EspoCRM for CAIKnowledge entities and enqueue sync events.
- Enhanced xai_service.py with methods for collection management, document listing, and metadata updates.
This commit is contained in:
bsiggel
2026-03-11 21:14:52 +00:00
parent a5a122b688
commit 9bbfa61b3b
7 changed files with 1366 additions and 1 deletions

View File

@@ -0,0 +1,90 @@
"""AI Knowledge Full Sync - Daily Cron Job"""
from typing import Any
from motia import FlowContext, cron
config = {
"name": "AI Knowledge Full Sync",
"description": "Daily full sync of all CAIKnowledge entities (catches missed webhooks)",
"flows": ["aiknowledge-full-sync"],
"triggers": [
cron("0 0 2 * * *"), # Daily at 2:00 AM
],
"enqueues": ["aiknowledge.sync"],
}
async def handler(input_data: None, ctx: FlowContext[Any]) -> None:
"""
Daily full sync handler.
Loads all CAIKnowledge entities that need sync and emits events.
Runs every day at 02:00:00.
"""
from services.espocrm import EspoCRMAPI
from services.models import AIKnowledgeActivationStatus, AIKnowledgeSyncStatus
ctx.logger.info("=" * 80)
ctx.logger.info("🌙 DAILY FULL SYNC STARTED")
ctx.logger.info("=" * 80)
espocrm = EspoCRMAPI(ctx)
try:
# Load all CAIKnowledge entities with status 'active' that need sync
result = await espocrm.list_entities(
'CAIKnowledge',
where=[
{
'type': 'equals',
'attribute': 'activationStatus',
'value': AIKnowledgeActivationStatus.ACTIVE.value
},
{
'type': 'in',
'attribute': 'syncStatus',
'value': [
AIKnowledgeSyncStatus.UNCLEAN.value,
AIKnowledgeSyncStatus.FAILED.value
]
}
],
select='id,name,syncStatus',
max_size=1000 # Adjust if you have more
)
entities = result.get('list', [])
total = len(entities)
ctx.logger.info(f"📊 Found {total} knowledge bases needing sync")
if total == 0:
ctx.logger.info("✅ All knowledge bases are synced")
ctx.logger.info("=" * 80)
return
# Enqueue sync events for all
for i, entity in enumerate(entities, 1):
await ctx.enqueue({
'topic': 'aiknowledge.sync',
'data': {
'knowledge_id': entity['id'],
'source': 'daily_full_sync'
}
})
ctx.logger.info(
f"📤 [{i}/{total}] Enqueued: {entity['name']} "
f"(syncStatus={entity.get('syncStatus')})"
)
ctx.logger.info("=" * 80)
ctx.logger.info(f"✅ Full sync complete: {total} events enqueued")
ctx.logger.info("=" * 80)
except Exception as e:
ctx.logger.error("=" * 80)
ctx.logger.error("❌ FULL SYNC FAILED")
ctx.logger.error("=" * 80)
ctx.logger.error(f"Error: {e}", exc_info=True)
raise

View File

@@ -0,0 +1,89 @@
"""AI Knowledge Sync Event Handler"""
from typing import Dict, Any
from redis import Redis
from motia import FlowContext, queue
config = {
"name": "AI Knowledge Sync",
"description": "Synchronizes CAIKnowledge entities with XAI Collections",
"flows": ["vmh-aiknowledge"],
"triggers": [
queue("aiknowledge.sync")
],
}
async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None:
"""
Event handler for AI Knowledge synchronization.
Emitted by:
- Webhook on CAIKnowledge update
- Daily full sync cron job
Args:
event_data: Event payload with knowledge_id
ctx: Motia context
"""
from services.config import get_redis_client
from services.aiknowledge_sync_utils import AIKnowledgeSync
ctx.logger.info("=" * 80)
ctx.logger.info("🔄 AI KNOWLEDGE SYNC STARTED")
ctx.logger.info("=" * 80)
# Extract data
knowledge_id = event_data.get('knowledge_id')
source = event_data.get('source', 'unknown')
if not knowledge_id:
ctx.logger.error("❌ Missing knowledge_id in event data")
return
ctx.logger.info(f"📋 Knowledge ID: {knowledge_id}")
ctx.logger.info(f"📋 Source: {source}")
ctx.logger.info("=" * 80)
# Get Redis for locking
redis_client: Redis = get_redis_client(strict=False)
# Initialize sync utils
sync_utils = AIKnowledgeSync(ctx, redis_client)
# Acquire lock
lock_acquired = await sync_utils.acquire_sync_lock(knowledge_id)
if not lock_acquired:
ctx.logger.warning(f"⏸️ Lock already held for {knowledge_id}, skipping")
ctx.logger.info(" (Will be retried by Motia queue)")
raise RuntimeError(f"Lock busy for {knowledge_id}") # Motia will retry
try:
# Perform sync
await sync_utils.sync_knowledge_to_xai(knowledge_id, ctx)
ctx.logger.info("=" * 80)
ctx.logger.info("✅ AI KNOWLEDGE SYNC COMPLETED")
ctx.logger.info("=" * 80)
# Release lock with success=True
await sync_utils.release_sync_lock(knowledge_id, success=True)
except Exception as e:
ctx.logger.error("=" * 80)
ctx.logger.error("❌ AI KNOWLEDGE SYNC FAILED")
ctx.logger.error("=" * 80)
ctx.logger.error(f"Error: {e}", exc_info=True)
ctx.logger.error(f"Knowledge ID: {knowledge_id}")
ctx.logger.error("=" * 80)
# Release lock with failure
await sync_utils.release_sync_lock(
knowledge_id,
success=False,
error_message=str(e)
)
# Re-raise to let Motia retry
raise

View File

@@ -0,0 +1,73 @@
"""VMH Webhook - AI Knowledge Update"""
from typing import Any
from motia import FlowContext, http, ApiRequest, ApiResponse
config = {
"name": "VMH Webhook AI Knowledge Update",
"description": "Receives update webhooks from EspoCRM for CAIKnowledge entities",
"flows": ["vmh-aiknowledge"],
"triggers": [
http("POST", "/vmh/webhook/aiknowledge/update")
],
"enqueues": ["aiknowledge.sync"],
}
async def handler(request: ApiRequest, ctx: FlowContext[Any]) -> ApiResponse:
"""
Webhook handler for CAIKnowledge updates in EspoCRM.
Triggered when:
- activationStatus changes
- syncStatus changes (e.g., set to 'unclean')
- Documents linked/unlinked
"""
try:
ctx.logger.info("=" * 80)
ctx.logger.info("🔔 AI Knowledge Update Webhook")
ctx.logger.info("=" * 80)
# Extract payload
payload = request.body
# Validate required fields
knowledge_id = payload.get('entity_id') or payload.get('id')
entity_type = payload.get('entity_type', 'CAIKnowledge')
action = payload.get('action', 'update')
if not knowledge_id:
ctx.logger.error("❌ Missing entity_id in payload")
return ApiResponse(
status_code=400,
body={'success': False, 'error': 'Missing entity_id'}
)
ctx.logger.info(f"📋 Entity Type: {entity_type}")
ctx.logger.info(f"📋 Entity ID: {knowledge_id}")
ctx.logger.info(f"📋 Action: {action}")
# Enqueue sync event
await ctx.enqueue({
'topic': 'aiknowledge.sync',
'data': {
'knowledge_id': knowledge_id,
'source': 'webhook',
'action': action
}
})
ctx.logger.info(f"✅ Sync event enqueued for {knowledge_id}")
ctx.logger.info("=" * 80)
return ApiResponse(
status_code=200,
body={'success': True, 'knowledge_id': knowledge_id}
)
except Exception as e:
ctx.logger.error(f"❌ Webhook error: {e}")
return ApiResponse(
status_code=500,
body={'success': False, 'error': str(e)}
)