feat: Implement AI Knowledge Sync Utilities and RAGFlow Service

- Added `aiknowledge_sync_utils.py` for provider-agnostic synchronization logic for CAIKnowledge entities, supporting both xAI and RAGFlow.
- Introduced lifecycle management for CAIKnowledge entities including states: new, active, paused, and deactivated.
- Implemented change detection using Blake3 hash for efficient document synchronization.
- Created `ragflow_service.py` to handle dataset and document management with RAGFlow API.
- Added daily cron job in `aiknowledge_daily_cron_step.py` to synchronize active CAIKnowledge entities with unclean or failed statuses.
- Developed `aiknowledge_sync_event_step.py` to process synchronization events from webhooks and cron jobs.
This commit is contained in:
bsiggel
2026-03-26 21:38:42 +00:00
parent 439101f35d
commit 9b2fb5ae4a
8 changed files with 1406 additions and 1 deletions

View File

@@ -336,3 +336,52 @@ def is_retryable_status_code(status_code: int) -> bool:
True wenn retryable
"""
return status_code in API_CONFIG.retry_status_codes
# ========== RAGFlow Configuration ==========
@dataclass
class RAGFlowConfig:
"""Konfiguration für RAGFlow AI Provider"""
# Connection
base_url: str = "http://192.168.1.64:9380"
"""RAGFlow Server URL"""
# Defaults
default_chunk_method: str = "laws"
"""Standard Chunk-Methode: 'laws' optimiert fuer Rechtsdokumente"""
# Parsing
auto_keywords: int = 14
"""Anzahl automatisch generierter Keywords pro Chunk"""
auto_questions: int = 7
"""Anzahl automatisch generierter Fragen pro Chunk"""
parse_timeout_seconds: int = 120
"""Timeout beim Warten auf Document-Parsing"""
parse_poll_interval: float = 3.0
"""Poll-Interval beim Warten auf Parsing (Sekunden)"""
# Meta-Fields Keys
meta_blake3_key: str = "blake3_hash"
"""Key für Blake3-Hash in meta_fields (Change Detection)"""
meta_espocrm_id_key: str = "espocrm_id"
"""Key für EspoCRM Document ID in meta_fields"""
meta_description_key: str = "description"
"""Key für Dokument-Beschreibung in meta_fields"""
@classmethod
def from_env(cls) -> 'RAGFlowConfig':
"""Lädt RAGFlow-Config aus Environment Variables"""
return cls(
base_url=os.getenv('RAGFLOW_BASE_URL', 'http://192.168.1.64:9380'),
parse_timeout_seconds=int(os.getenv('RAGFLOW_PARSE_TIMEOUT', '120')),
)
RAGFLOW_CONFIG = RAGFlowConfig.from_env()