feat: Implement AI Knowledge Sync Utilities and RAGFlow Service

- Added `aiknowledge_sync_utils.py` for provider-agnostic synchronization logic for CAIKnowledge entities, supporting both xAI and RAGFlow.
- Introduced lifecycle management for CAIKnowledge entities including states: new, active, paused, and deactivated.
- Implemented change detection using Blake3 hash for efficient document synchronization.
- Created `ragflow_service.py` to handle dataset and document management with RAGFlow API.
- Added daily cron job in `aiknowledge_daily_cron_step.py` to synchronize active CAIKnowledge entities with unclean or failed statuses.
- Developed `aiknowledge_sync_event_step.py` to process synchronization events from webhooks and cron jobs.
This commit is contained in:
bsiggel
2026-03-26 21:38:42 +00:00
parent 439101f35d
commit 9b2fb5ae4a
8 changed files with 1406 additions and 1 deletions

View File

@@ -0,0 +1,89 @@
"""
AI Knowledge Daily Full Sync (Cron)
Laueft taeglich um 02:00 Uhr.
Laedt alle CAIKnowledge-Entities mit activationStatus='active'
und syncStatus IN ('unclean', 'failed') und stellt sicher,
dass sie synchroisiert sind.
Emits aiknowledge.sync fuer jede betroffene Entity.
"""
from typing import Any
from motia import FlowContext, cron
from services.espocrm import EspoCRMAPI
from services.logging_utils import get_step_logger
config = {
"name": "AI Knowledge Daily Cron",
"description": "Taeglich: Vollsync aller unclean/failed CAIKnowledge Entities",
"flows": ["vmh-aiknowledge"],
"triggers": [
cron("0 2 * * *"), # Taeglich 02:00 Uhr
],
"enqueues": ["aiknowledge.sync"],
}
async def handler(event_data: Any, ctx: FlowContext[Any]) -> None:
"""
Cron-Handler: Enqueued aiknowledge.sync fuer alle die Sync brauchen.
"""
step_logger = get_step_logger('aiknowledge_cron', ctx)
step_logger.info("=" * 70)
step_logger.info("⏰ AI KNOWLEDGE DAILY CRON START")
step_logger.info("=" * 70)
espocrm = EspoCRMAPI(ctx)
# Alle active KBs mit unclean oder failed Status
try:
result = await espocrm.list_entities(
'CAIKnowledge',
where=[
{
'type': 'equals',
'attribute': 'activationStatus',
'value': 'active',
},
{
'type': 'in',
'attribute': 'syncStatus',
'value': ['unclean', 'failed'],
},
],
max_size=200,
)
except Exception as e:
step_logger.error(f"❌ EspoCRM-Abfrage fehlgeschlagen: {e}")
return
entities = result.get('list', [])
total = result.get('total', len(entities))
step_logger.info(f"📋 {len(entities)}/{total} Entities brauchen Sync")
enqueued = 0
for entity in entities:
knowledge_id = entity.get('id')
name = entity.get('name', knowledge_id)
provider = entity.get('aiProvider', 'xai')
sync_status = entity.get('syncStatus', '?')
if not knowledge_id:
continue
step_logger.info(f" → Enqueue: {name} ({provider}, status={sync_status})")
await ctx.enqueue({
'topic': 'aiknowledge.sync',
'data': {
'knowledge_id': knowledge_id,
'source': 'cron',
'action': 'update',
},
})
enqueued += 1
step_logger.info(f"{enqueued} Sync-Events enqueued")
step_logger.info("=" * 70)

View File

@@ -0,0 +1,64 @@
"""
AI Knowledge Sync Handler
Verarbeitet aiknowledge.sync Events (Queue).
Quellen:
- Webhook: EspoCRM CAIKnowledge.afterSave
- Cron: Taeglich 02:00 Uhr (Vollsync)
Lifecycle:
new → Dataset/Collection erstellen (xAI oder RAGFlow)
active → Dokumente syncen (Change Detection via Blake3)
paused → Skip
deactivated → Dataset/Collection loeschen
"""
from typing import Any, Dict
from motia import FlowContext, queue
from services.espocrm import EspoCRMAPI
from services.redis_client import get_redis_client
from services.aiknowledge_sync_utils import AIKnowledgeSyncUtils
from services.logging_utils import get_step_logger
config = {
"name": "AI Knowledge Sync Handler",
"description": "Synchronisiert CAIKnowledge Entities mit xAI oder RAGFlow",
"flows": ["vmh-aiknowledge"],
"triggers": [
queue("aiknowledge.sync"),
],
"enqueues": [],
}
async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None:
"""
Zentraler Sync-Handler fuer CAIKnowledge.
event_data:
knowledge_id (str) EspoCRM CAIKnowledge ID
source (str) 'webhook' | 'cron'
action (str) 'create' | 'update'
"""
step_logger = get_step_logger('aiknowledge_sync', ctx)
knowledge_id = event_data.get('knowledge_id')
source = event_data.get('source', 'webhook')
action = event_data.get('action', 'update')
if not knowledge_id:
step_logger.error("❌ Kein knowledge_id im Event")
return
step_logger.info("=" * 70)
step_logger.info(f"🔄 AI KNOWLEDGE SYNC EVENT")
step_logger.info(f" ID : {knowledge_id}")
step_logger.info(f" Source: {source} | Action: {action}")
step_logger.info("=" * 70)
espocrm = EspoCRMAPI(ctx)
redis_client = get_redis_client(strict=False)
sync = AIKnowledgeSyncUtils(espocrm, redis_client, ctx)
await sync.run_sync(knowledge_id)