feat: Implement AI Knowledge Sync Utilities and RAGFlow Service
- Added `aiknowledge_sync_utils.py` for provider-agnostic synchronization logic for CAIKnowledge entities, supporting both xAI and RAGFlow. - Introduced lifecycle management for CAIKnowledge entities including states: new, active, paused, and deactivated. - Implemented change detection using Blake3 hash for efficient document synchronization. - Created `ragflow_service.py` to handle dataset and document management with RAGFlow API. - Added daily cron job in `aiknowledge_daily_cron_step.py` to synchronize active CAIKnowledge entities with unclean or failed statuses. - Developed `aiknowledge_sync_event_step.py` to process synchronization events from webhooks and cron jobs.
This commit is contained in:
89
src/steps/crm/document/aiknowledge_daily_cron_step.py
Normal file
89
src/steps/crm/document/aiknowledge_daily_cron_step.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
AI Knowledge Daily Full Sync (Cron)
|
||||
|
||||
Laueft taeglich um 02:00 Uhr.
|
||||
|
||||
Laedt alle CAIKnowledge-Entities mit activationStatus='active'
|
||||
und syncStatus IN ('unclean', 'failed') und stellt sicher,
|
||||
dass sie synchroisiert sind.
|
||||
|
||||
Emits aiknowledge.sync fuer jede betroffene Entity.
|
||||
"""
|
||||
from typing import Any
|
||||
from motia import FlowContext, cron
|
||||
|
||||
from services.espocrm import EspoCRMAPI
|
||||
from services.logging_utils import get_step_logger
|
||||
|
||||
config = {
|
||||
"name": "AI Knowledge Daily Cron",
|
||||
"description": "Taeglich: Vollsync aller unclean/failed CAIKnowledge Entities",
|
||||
"flows": ["vmh-aiknowledge"],
|
||||
"triggers": [
|
||||
cron("0 2 * * *"), # Taeglich 02:00 Uhr
|
||||
],
|
||||
"enqueues": ["aiknowledge.sync"],
|
||||
}
|
||||
|
||||
|
||||
async def handler(event_data: Any, ctx: FlowContext[Any]) -> None:
|
||||
"""
|
||||
Cron-Handler: Enqueued aiknowledge.sync fuer alle die Sync brauchen.
|
||||
"""
|
||||
step_logger = get_step_logger('aiknowledge_cron', ctx)
|
||||
|
||||
step_logger.info("=" * 70)
|
||||
step_logger.info("⏰ AI KNOWLEDGE DAILY CRON START")
|
||||
step_logger.info("=" * 70)
|
||||
|
||||
espocrm = EspoCRMAPI(ctx)
|
||||
|
||||
# Alle active KBs mit unclean oder failed Status
|
||||
try:
|
||||
result = await espocrm.list_entities(
|
||||
'CAIKnowledge',
|
||||
where=[
|
||||
{
|
||||
'type': 'equals',
|
||||
'attribute': 'activationStatus',
|
||||
'value': 'active',
|
||||
},
|
||||
{
|
||||
'type': 'in',
|
||||
'attribute': 'syncStatus',
|
||||
'value': ['unclean', 'failed'],
|
||||
},
|
||||
],
|
||||
max_size=200,
|
||||
)
|
||||
except Exception as e:
|
||||
step_logger.error(f"❌ EspoCRM-Abfrage fehlgeschlagen: {e}")
|
||||
return
|
||||
|
||||
entities = result.get('list', [])
|
||||
total = result.get('total', len(entities))
|
||||
step_logger.info(f"📋 {len(entities)}/{total} Entities brauchen Sync")
|
||||
|
||||
enqueued = 0
|
||||
for entity in entities:
|
||||
knowledge_id = entity.get('id')
|
||||
name = entity.get('name', knowledge_id)
|
||||
provider = entity.get('aiProvider', 'xai')
|
||||
sync_status = entity.get('syncStatus', '?')
|
||||
|
||||
if not knowledge_id:
|
||||
continue
|
||||
|
||||
step_logger.info(f" → Enqueue: {name} ({provider}, status={sync_status})")
|
||||
await ctx.enqueue({
|
||||
'topic': 'aiknowledge.sync',
|
||||
'data': {
|
||||
'knowledge_id': knowledge_id,
|
||||
'source': 'cron',
|
||||
'action': 'update',
|
||||
},
|
||||
})
|
||||
enqueued += 1
|
||||
|
||||
step_logger.info(f"✅ {enqueued} Sync-Events enqueued")
|
||||
step_logger.info("=" * 70)
|
||||
64
src/steps/crm/document/aiknowledge_sync_event_step.py
Normal file
64
src/steps/crm/document/aiknowledge_sync_event_step.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""
|
||||
AI Knowledge Sync Handler
|
||||
|
||||
Verarbeitet aiknowledge.sync Events (Queue).
|
||||
|
||||
Quellen:
|
||||
- Webhook: EspoCRM CAIKnowledge.afterSave
|
||||
- Cron: Taeglich 02:00 Uhr (Vollsync)
|
||||
|
||||
Lifecycle:
|
||||
new → Dataset/Collection erstellen (xAI oder RAGFlow)
|
||||
active → Dokumente syncen (Change Detection via Blake3)
|
||||
paused → Skip
|
||||
deactivated → Dataset/Collection loeschen
|
||||
"""
|
||||
from typing import Any, Dict
|
||||
from motia import FlowContext, queue
|
||||
|
||||
from services.espocrm import EspoCRMAPI
|
||||
from services.redis_client import get_redis_client
|
||||
from services.aiknowledge_sync_utils import AIKnowledgeSyncUtils
|
||||
from services.logging_utils import get_step_logger
|
||||
|
||||
config = {
|
||||
"name": "AI Knowledge Sync Handler",
|
||||
"description": "Synchronisiert CAIKnowledge Entities mit xAI oder RAGFlow",
|
||||
"flows": ["vmh-aiknowledge"],
|
||||
"triggers": [
|
||||
queue("aiknowledge.sync"),
|
||||
],
|
||||
"enqueues": [],
|
||||
}
|
||||
|
||||
|
||||
async def handler(event_data: Dict[str, Any], ctx: FlowContext[Any]) -> None:
|
||||
"""
|
||||
Zentraler Sync-Handler fuer CAIKnowledge.
|
||||
|
||||
event_data:
|
||||
knowledge_id (str) – EspoCRM CAIKnowledge ID
|
||||
source (str) – 'webhook' | 'cron'
|
||||
action (str) – 'create' | 'update'
|
||||
"""
|
||||
step_logger = get_step_logger('aiknowledge_sync', ctx)
|
||||
|
||||
knowledge_id = event_data.get('knowledge_id')
|
||||
source = event_data.get('source', 'webhook')
|
||||
action = event_data.get('action', 'update')
|
||||
|
||||
if not knowledge_id:
|
||||
step_logger.error("❌ Kein knowledge_id im Event")
|
||||
return
|
||||
|
||||
step_logger.info("=" * 70)
|
||||
step_logger.info(f"🔄 AI KNOWLEDGE SYNC EVENT")
|
||||
step_logger.info(f" ID : {knowledge_id}")
|
||||
step_logger.info(f" Source: {source} | Action: {action}")
|
||||
step_logger.info("=" * 70)
|
||||
|
||||
espocrm = EspoCRMAPI(ctx)
|
||||
redis_client = get_redis_client(strict=False)
|
||||
sync = AIKnowledgeSyncUtils(espocrm, redis_client, ctx)
|
||||
|
||||
await sync.run_sync(knowledge_id)
|
||||
Reference in New Issue
Block a user