Files
motia-iii/services/config.py
bsiggel 9b2fb5ae4a feat: Implement AI Knowledge Sync Utilities and RAGFlow Service
- Added `aiknowledge_sync_utils.py` for provider-agnostic synchronization logic for CAIKnowledge entities, supporting both xAI and RAGFlow.
- Introduced lifecycle management for CAIKnowledge entities including states: new, active, paused, and deactivated.
- Implemented change detection using Blake3 hash for efficient document synchronization.
- Created `ragflow_service.py` to handle dataset and document management with RAGFlow API.
- Added daily cron job in `aiknowledge_daily_cron_step.py` to synchronize active CAIKnowledge entities with unclean or failed statuses.
- Developed `aiknowledge_sync_event_step.py` to process synchronization events from webhooks and cron jobs.
2026-03-26 21:38:42 +00:00

388 lines
9.7 KiB
Python

"""
Zentrale Konfiguration für BitByLaw Integration
Alle Magic Numbers und Strings sind hier zentralisiert.
"""
from typing import List, Dict
from dataclasses import dataclass
import os
# ========== Sync Configuration ==========
@dataclass
class SyncConfig:
"""Konfiguration für Sync-Operationen"""
# Retry-Konfiguration
max_retries: int = 5
"""Maximale Anzahl von Retry-Versuchen"""
retry_backoff_minutes: List[int] = None
"""Exponential Backoff in Minuten: [1, 5, 15, 60, 240]"""
auto_reset_hours: int = 24
"""Auto-Reset für permanently_failed Entities (in Stunden)"""
# Lock-Konfiguration
lock_ttl_seconds: int = 900 # 15 Minuten
"""TTL für distributed locks (verhindert Deadlocks)"""
lock_prefix: str = "sync_lock"
"""Prefix für Redis Lock Keys"""
# Validation
validate_before_sync: bool = True
"""Validiere Entities vor dem Sync (empfohlen)"""
# Change Detection
use_rowid_change_detection: bool = True
"""Nutze rowId für Change Detection (Advoware)"""
def __post_init__(self):
if self.retry_backoff_minutes is None:
# Default exponential backoff: 1, 5, 15, 60, 240 Minuten
self.retry_backoff_minutes = [1, 5, 15, 60, 240]
# Singleton Instance
SYNC_CONFIG = SyncConfig()
# ========== API Configuration ==========
@dataclass
class APIConfig:
"""API-spezifische Konfiguration"""
# Timeouts
default_timeout_seconds: int = 30
"""Default Timeout für API-Calls"""
long_running_timeout_seconds: int = 120
"""Timeout für lange Operations (z.B. Uploads)"""
# Retry
max_api_retries: int = 3
"""Anzahl Retries bei API-Fehlern"""
retry_status_codes: List[int] = None
"""HTTP Status Codes die Retry auslösen"""
# Rate Limiting
rate_limit_enabled: bool = True
"""Aktiviere Rate Limiting"""
rate_limit_calls_per_minute: int = 60
"""Max. API-Calls pro Minute"""
def __post_init__(self):
if self.retry_status_codes is None:
# Retry bei: 408 (Timeout), 429 (Rate Limit), 500, 502, 503, 504
self.retry_status_codes = [408, 429, 500, 502, 503, 504]
API_CONFIG = APIConfig()
# ========== Advoware Configuration ==========
@dataclass
class AdvowareConfig:
"""Advoware-spezifische Konfiguration"""
# Token Management
token_lifetime_minutes: int = 55
"""Token-Lifetime (tatsächlich 60min, aber 5min Puffer)"""
token_cache_key: str = "advoware_access_token"
"""Redis Key für Token Cache"""
token_timestamp_key: str = "advoware_token_timestamp"
"""Redis Key für Token Timestamp"""
# Auth
auth_url: str = "https://security.advo-net.net/api/v1/Token"
"""Advoware Auth-Endpoint"""
product_id: int = 64
"""Advoware Product ID"""
# Field Mapping
readonly_fields: List[str] = None
"""Felder die nicht via PUT geändert werden können"""
def __post_init__(self):
if self.readonly_fields is None:
# Diese Felder können nicht via PUT geändert werden
self.readonly_fields = [
'betNr', 'rowId', 'kommKz', # Kommunikation: kommKz ist read-only!
'handelsRegisterNummer', 'registergericht' # Werden ignoriert von API
]
ADVOWARE_CONFIG = AdvowareConfig()
# ========== EspoCRM Configuration ==========
@dataclass
class EspoCRMConfig:
"""EspoCRM-spezifische Konfiguration"""
# API
default_page_size: int = 50
"""Default Seitengröße für Listen-Abfragen"""
max_page_size: int = 200
"""Maximale Seitengröße"""
# Sync Status Fields
sync_status_field: str = "syncStatus"
"""Feldname für Sync-Status"""
sync_error_field: str = "syncErrorMessage"
"""Feldname für Sync-Fehler"""
sync_retry_field: str = "syncRetryCount"
"""Feldname für Retry-Counter"""
# Notifications
notification_enabled: bool = True
"""In-App Notifications aktivieren"""
notification_user_id: str = "1"
"""User-ID für Notifications (Marvin)"""
ESPOCRM_CONFIG = EspoCRMConfig()
# ========== Redis Configuration ==========
@dataclass
class RedisConfig:
"""Redis-spezifische Konfiguration"""
# Connection
host: str = "localhost"
port: int = 6379
db: int = 1
timeout_seconds: int = 5
max_connections: int = 50
# Behavior
decode_responses: bool = True
"""Auto-decode bytes zu strings"""
health_check_interval: int = 30
"""Health-Check Interval in Sekunden"""
# Keys
key_prefix: str = "bitbylaw"
"""Prefix für alle Redis Keys"""
def get_key(self, key: str) -> str:
"""Gibt vollen Redis Key mit Prefix zurück"""
return f"{self.key_prefix}:{key}"
@classmethod
def from_env(cls) -> 'RedisConfig':
"""Lädt Redis-Config aus Environment Variables"""
return cls(
host=os.getenv('REDIS_HOST', 'localhost'),
port=int(os.getenv('REDIS_PORT', '6379')),
db=int(os.getenv('REDIS_DB_ADVOWARE_CACHE', '1')),
timeout_seconds=int(os.getenv('REDIS_TIMEOUT_SECONDS', '5')),
max_connections=int(os.getenv('REDIS_MAX_CONNECTIONS', '50'))
)
REDIS_CONFIG = RedisConfig.from_env()
# ========== Logging Configuration ==========
@dataclass
class LoggingConfig:
"""Logging-Konfiguration"""
# Levels
default_level: str = "INFO"
"""Default Log-Level"""
api_level: str = "INFO"
"""Log-Level für API-Calls"""
sync_level: str = "INFO"
"""Log-Level für Sync-Operations"""
# Format
log_format: str = "[{timestamp}] {level} {logger}: {message}"
"""Log-Format"""
include_context: bool = True
"""Motia FlowContext in Logs einbinden"""
# Performance
log_api_timings: bool = True
"""API Call Timings loggen"""
log_sync_duration: bool = True
"""Sync-Dauer loggen"""
LOGGING_CONFIG = LoggingConfig()
# ========== Calendar Sync Configuration ==========
@dataclass
class CalendarSyncConfig:
"""Konfiguration für Google Calendar Sync"""
# Sync Window
sync_days_past: int = 7
"""Tage in die Vergangenheit syncen"""
sync_days_future: int = 90
"""Tage in die Zukunft syncen"""
# Cron
cron_schedule: str = "0 */15 * * * *"
"""Cron-Schedule (jede 15 Minuten)"""
# Batch Size
batch_size: int = 10
"""Anzahl Mitarbeiter pro Batch"""
CALENDAR_SYNC_CONFIG = CalendarSyncConfig()
# ========== Feature Flags ==========
@dataclass
class FeatureFlags:
"""Feature Flags für schrittweises Rollout"""
# Validation
strict_validation: bool = True
"""Strenge Validierung mit Pydantic"""
# Sync Features
kommunikation_sync_enabled: bool = False
"""Kommunikation-Sync aktivieren (noch in Entwicklung)"""
document_sync_enabled: bool = False
"""Document-Sync aktivieren (noch in Entwicklung)"""
# Advanced Features
parallel_sync_enabled: bool = False
"""Parallele Sync-Operations (experimentell)"""
auto_conflict_resolution: bool = False
"""Automatische Konfliktauflösung (experimentell)"""
# Debug
debug_mode: bool = False
"""Debug-Modus (mehr Logging, langsamer)"""
FEATURE_FLAGS = FeatureFlags()
# ========== Helper Functions ==========
def get_retry_delay_seconds(attempt: int) -> int:
"""
Gibt Retry-Delay in Sekunden für gegebenen Versuch zurück.
Args:
attempt: Versuchs-Nummer (0-indexed)
Returns:
Delay in Sekunden
"""
backoff_minutes = SYNC_CONFIG.retry_backoff_minutes
if attempt < len(backoff_minutes):
return backoff_minutes[attempt] * 60
return backoff_minutes[-1] * 60
def get_lock_key(entity_type: str, entity_id: str) -> str:
"""
Erzeugt Redis Lock-Key für Entity.
Args:
entity_type: Entity-Typ (z.B. 'cbeteiligte')
entity_id: Entity-ID
Returns:
Redis Key
"""
return f"{SYNC_CONFIG.lock_prefix}:{entity_type.lower()}:{entity_id}"
def is_retryable_status_code(status_code: int) -> bool:
"""
Prüft ob HTTP Status Code Retry auslösen soll.
Args:
status_code: HTTP Status Code
Returns:
True wenn retryable
"""
return status_code in API_CONFIG.retry_status_codes
# ========== RAGFlow Configuration ==========
@dataclass
class RAGFlowConfig:
"""Konfiguration für RAGFlow AI Provider"""
# Connection
base_url: str = "http://192.168.1.64:9380"
"""RAGFlow Server URL"""
# Defaults
default_chunk_method: str = "laws"
"""Standard Chunk-Methode: 'laws' optimiert fuer Rechtsdokumente"""
# Parsing
auto_keywords: int = 14
"""Anzahl automatisch generierter Keywords pro Chunk"""
auto_questions: int = 7
"""Anzahl automatisch generierter Fragen pro Chunk"""
parse_timeout_seconds: int = 120
"""Timeout beim Warten auf Document-Parsing"""
parse_poll_interval: float = 3.0
"""Poll-Interval beim Warten auf Parsing (Sekunden)"""
# Meta-Fields Keys
meta_blake3_key: str = "blake3_hash"
"""Key für Blake3-Hash in meta_fields (Change Detection)"""
meta_espocrm_id_key: str = "espocrm_id"
"""Key für EspoCRM Document ID in meta_fields"""
meta_description_key: str = "description"
"""Key für Dokument-Beschreibung in meta_fields"""
@classmethod
def from_env(cls) -> 'RAGFlowConfig':
"""Lädt RAGFlow-Config aus Environment Variables"""
return cls(
base_url=os.getenv('RAGFLOW_BASE_URL', 'http://192.168.1.64:9380'),
parse_timeout_seconds=int(os.getenv('RAGFLOW_PARSE_TIMEOUT', '120')),
)
RAGFLOW_CONFIG = RAGFlowConfig.from_env()