- Added `aiknowledge_sync_utils.py` for provider-agnostic synchronization logic for CAIKnowledge entities, supporting both xAI and RAGFlow. - Introduced lifecycle management for CAIKnowledge entities including states: new, active, paused, and deactivated. - Implemented change detection using Blake3 hash for efficient document synchronization. - Created `ragflow_service.py` to handle dataset and document management with RAGFlow API. - Added daily cron job in `aiknowledge_daily_cron_step.py` to synchronize active CAIKnowledge entities with unclean or failed statuses. - Developed `aiknowledge_sync_event_step.py` to process synchronization events from webhooks and cron jobs.
388 lines
9.7 KiB
Python
388 lines
9.7 KiB
Python
"""
|
|
Zentrale Konfiguration für BitByLaw Integration
|
|
|
|
Alle Magic Numbers und Strings sind hier zentralisiert.
|
|
"""
|
|
|
|
from typing import List, Dict
|
|
from dataclasses import dataclass
|
|
import os
|
|
|
|
|
|
# ========== Sync Configuration ==========
|
|
|
|
@dataclass
|
|
class SyncConfig:
|
|
"""Konfiguration für Sync-Operationen"""
|
|
|
|
# Retry-Konfiguration
|
|
max_retries: int = 5
|
|
"""Maximale Anzahl von Retry-Versuchen"""
|
|
|
|
retry_backoff_minutes: List[int] = None
|
|
"""Exponential Backoff in Minuten: [1, 5, 15, 60, 240]"""
|
|
|
|
auto_reset_hours: int = 24
|
|
"""Auto-Reset für permanently_failed Entities (in Stunden)"""
|
|
|
|
# Lock-Konfiguration
|
|
lock_ttl_seconds: int = 900 # 15 Minuten
|
|
"""TTL für distributed locks (verhindert Deadlocks)"""
|
|
|
|
lock_prefix: str = "sync_lock"
|
|
"""Prefix für Redis Lock Keys"""
|
|
|
|
# Validation
|
|
validate_before_sync: bool = True
|
|
"""Validiere Entities vor dem Sync (empfohlen)"""
|
|
|
|
# Change Detection
|
|
use_rowid_change_detection: bool = True
|
|
"""Nutze rowId für Change Detection (Advoware)"""
|
|
|
|
def __post_init__(self):
|
|
if self.retry_backoff_minutes is None:
|
|
# Default exponential backoff: 1, 5, 15, 60, 240 Minuten
|
|
self.retry_backoff_minutes = [1, 5, 15, 60, 240]
|
|
|
|
|
|
# Singleton Instance
|
|
SYNC_CONFIG = SyncConfig()
|
|
|
|
|
|
# ========== API Configuration ==========
|
|
|
|
@dataclass
|
|
class APIConfig:
|
|
"""API-spezifische Konfiguration"""
|
|
|
|
# Timeouts
|
|
default_timeout_seconds: int = 30
|
|
"""Default Timeout für API-Calls"""
|
|
|
|
long_running_timeout_seconds: int = 120
|
|
"""Timeout für lange Operations (z.B. Uploads)"""
|
|
|
|
# Retry
|
|
max_api_retries: int = 3
|
|
"""Anzahl Retries bei API-Fehlern"""
|
|
|
|
retry_status_codes: List[int] = None
|
|
"""HTTP Status Codes die Retry auslösen"""
|
|
|
|
# Rate Limiting
|
|
rate_limit_enabled: bool = True
|
|
"""Aktiviere Rate Limiting"""
|
|
|
|
rate_limit_calls_per_minute: int = 60
|
|
"""Max. API-Calls pro Minute"""
|
|
|
|
def __post_init__(self):
|
|
if self.retry_status_codes is None:
|
|
# Retry bei: 408 (Timeout), 429 (Rate Limit), 500, 502, 503, 504
|
|
self.retry_status_codes = [408, 429, 500, 502, 503, 504]
|
|
|
|
|
|
API_CONFIG = APIConfig()
|
|
|
|
|
|
# ========== Advoware Configuration ==========
|
|
|
|
@dataclass
|
|
class AdvowareConfig:
|
|
"""Advoware-spezifische Konfiguration"""
|
|
|
|
# Token Management
|
|
token_lifetime_minutes: int = 55
|
|
"""Token-Lifetime (tatsächlich 60min, aber 5min Puffer)"""
|
|
|
|
token_cache_key: str = "advoware_access_token"
|
|
"""Redis Key für Token Cache"""
|
|
|
|
token_timestamp_key: str = "advoware_token_timestamp"
|
|
"""Redis Key für Token Timestamp"""
|
|
|
|
# Auth
|
|
auth_url: str = "https://security.advo-net.net/api/v1/Token"
|
|
"""Advoware Auth-Endpoint"""
|
|
|
|
product_id: int = 64
|
|
"""Advoware Product ID"""
|
|
|
|
# Field Mapping
|
|
readonly_fields: List[str] = None
|
|
"""Felder die nicht via PUT geändert werden können"""
|
|
|
|
def __post_init__(self):
|
|
if self.readonly_fields is None:
|
|
# Diese Felder können nicht via PUT geändert werden
|
|
self.readonly_fields = [
|
|
'betNr', 'rowId', 'kommKz', # Kommunikation: kommKz ist read-only!
|
|
'handelsRegisterNummer', 'registergericht' # Werden ignoriert von API
|
|
]
|
|
|
|
|
|
ADVOWARE_CONFIG = AdvowareConfig()
|
|
|
|
|
|
# ========== EspoCRM Configuration ==========
|
|
|
|
@dataclass
|
|
class EspoCRMConfig:
|
|
"""EspoCRM-spezifische Konfiguration"""
|
|
|
|
# API
|
|
default_page_size: int = 50
|
|
"""Default Seitengröße für Listen-Abfragen"""
|
|
|
|
max_page_size: int = 200
|
|
"""Maximale Seitengröße"""
|
|
|
|
# Sync Status Fields
|
|
sync_status_field: str = "syncStatus"
|
|
"""Feldname für Sync-Status"""
|
|
|
|
sync_error_field: str = "syncErrorMessage"
|
|
"""Feldname für Sync-Fehler"""
|
|
|
|
sync_retry_field: str = "syncRetryCount"
|
|
"""Feldname für Retry-Counter"""
|
|
|
|
# Notifications
|
|
notification_enabled: bool = True
|
|
"""In-App Notifications aktivieren"""
|
|
|
|
notification_user_id: str = "1"
|
|
"""User-ID für Notifications (Marvin)"""
|
|
|
|
|
|
ESPOCRM_CONFIG = EspoCRMConfig()
|
|
|
|
|
|
# ========== Redis Configuration ==========
|
|
|
|
@dataclass
|
|
class RedisConfig:
|
|
"""Redis-spezifische Konfiguration"""
|
|
|
|
# Connection
|
|
host: str = "localhost"
|
|
port: int = 6379
|
|
db: int = 1
|
|
timeout_seconds: int = 5
|
|
max_connections: int = 50
|
|
|
|
# Behavior
|
|
decode_responses: bool = True
|
|
"""Auto-decode bytes zu strings"""
|
|
|
|
health_check_interval: int = 30
|
|
"""Health-Check Interval in Sekunden"""
|
|
|
|
# Keys
|
|
key_prefix: str = "bitbylaw"
|
|
"""Prefix für alle Redis Keys"""
|
|
|
|
def get_key(self, key: str) -> str:
|
|
"""Gibt vollen Redis Key mit Prefix zurück"""
|
|
return f"{self.key_prefix}:{key}"
|
|
|
|
@classmethod
|
|
def from_env(cls) -> 'RedisConfig':
|
|
"""Lädt Redis-Config aus Environment Variables"""
|
|
return cls(
|
|
host=os.getenv('REDIS_HOST', 'localhost'),
|
|
port=int(os.getenv('REDIS_PORT', '6379')),
|
|
db=int(os.getenv('REDIS_DB_ADVOWARE_CACHE', '1')),
|
|
timeout_seconds=int(os.getenv('REDIS_TIMEOUT_SECONDS', '5')),
|
|
max_connections=int(os.getenv('REDIS_MAX_CONNECTIONS', '50'))
|
|
)
|
|
|
|
|
|
REDIS_CONFIG = RedisConfig.from_env()
|
|
|
|
|
|
# ========== Logging Configuration ==========
|
|
|
|
@dataclass
|
|
class LoggingConfig:
|
|
"""Logging-Konfiguration"""
|
|
|
|
# Levels
|
|
default_level: str = "INFO"
|
|
"""Default Log-Level"""
|
|
|
|
api_level: str = "INFO"
|
|
"""Log-Level für API-Calls"""
|
|
|
|
sync_level: str = "INFO"
|
|
"""Log-Level für Sync-Operations"""
|
|
|
|
# Format
|
|
log_format: str = "[{timestamp}] {level} {logger}: {message}"
|
|
"""Log-Format"""
|
|
|
|
include_context: bool = True
|
|
"""Motia FlowContext in Logs einbinden"""
|
|
|
|
# Performance
|
|
log_api_timings: bool = True
|
|
"""API Call Timings loggen"""
|
|
|
|
log_sync_duration: bool = True
|
|
"""Sync-Dauer loggen"""
|
|
|
|
|
|
LOGGING_CONFIG = LoggingConfig()
|
|
|
|
|
|
# ========== Calendar Sync Configuration ==========
|
|
|
|
@dataclass
|
|
class CalendarSyncConfig:
|
|
"""Konfiguration für Google Calendar Sync"""
|
|
|
|
# Sync Window
|
|
sync_days_past: int = 7
|
|
"""Tage in die Vergangenheit syncen"""
|
|
|
|
sync_days_future: int = 90
|
|
"""Tage in die Zukunft syncen"""
|
|
|
|
# Cron
|
|
cron_schedule: str = "0 */15 * * * *"
|
|
"""Cron-Schedule (jede 15 Minuten)"""
|
|
|
|
# Batch Size
|
|
batch_size: int = 10
|
|
"""Anzahl Mitarbeiter pro Batch"""
|
|
|
|
|
|
CALENDAR_SYNC_CONFIG = CalendarSyncConfig()
|
|
|
|
|
|
# ========== Feature Flags ==========
|
|
|
|
@dataclass
|
|
class FeatureFlags:
|
|
"""Feature Flags für schrittweises Rollout"""
|
|
|
|
# Validation
|
|
strict_validation: bool = True
|
|
"""Strenge Validierung mit Pydantic"""
|
|
|
|
# Sync Features
|
|
kommunikation_sync_enabled: bool = False
|
|
"""Kommunikation-Sync aktivieren (noch in Entwicklung)"""
|
|
|
|
document_sync_enabled: bool = False
|
|
"""Document-Sync aktivieren (noch in Entwicklung)"""
|
|
|
|
# Advanced Features
|
|
parallel_sync_enabled: bool = False
|
|
"""Parallele Sync-Operations (experimentell)"""
|
|
|
|
auto_conflict_resolution: bool = False
|
|
"""Automatische Konfliktauflösung (experimentell)"""
|
|
|
|
# Debug
|
|
debug_mode: bool = False
|
|
"""Debug-Modus (mehr Logging, langsamer)"""
|
|
|
|
|
|
FEATURE_FLAGS = FeatureFlags()
|
|
|
|
|
|
# ========== Helper Functions ==========
|
|
|
|
def get_retry_delay_seconds(attempt: int) -> int:
|
|
"""
|
|
Gibt Retry-Delay in Sekunden für gegebenen Versuch zurück.
|
|
|
|
Args:
|
|
attempt: Versuchs-Nummer (0-indexed)
|
|
|
|
Returns:
|
|
Delay in Sekunden
|
|
"""
|
|
backoff_minutes = SYNC_CONFIG.retry_backoff_minutes
|
|
if attempt < len(backoff_minutes):
|
|
return backoff_minutes[attempt] * 60
|
|
return backoff_minutes[-1] * 60
|
|
|
|
|
|
def get_lock_key(entity_type: str, entity_id: str) -> str:
|
|
"""
|
|
Erzeugt Redis Lock-Key für Entity.
|
|
|
|
Args:
|
|
entity_type: Entity-Typ (z.B. 'cbeteiligte')
|
|
entity_id: Entity-ID
|
|
|
|
Returns:
|
|
Redis Key
|
|
"""
|
|
return f"{SYNC_CONFIG.lock_prefix}:{entity_type.lower()}:{entity_id}"
|
|
|
|
|
|
def is_retryable_status_code(status_code: int) -> bool:
|
|
"""
|
|
Prüft ob HTTP Status Code Retry auslösen soll.
|
|
|
|
Args:
|
|
status_code: HTTP Status Code
|
|
|
|
Returns:
|
|
True wenn retryable
|
|
"""
|
|
return status_code in API_CONFIG.retry_status_codes
|
|
|
|
|
|
# ========== RAGFlow Configuration ==========
|
|
|
|
@dataclass
|
|
class RAGFlowConfig:
|
|
"""Konfiguration für RAGFlow AI Provider"""
|
|
|
|
# Connection
|
|
base_url: str = "http://192.168.1.64:9380"
|
|
"""RAGFlow Server URL"""
|
|
|
|
# Defaults
|
|
default_chunk_method: str = "laws"
|
|
"""Standard Chunk-Methode: 'laws' optimiert fuer Rechtsdokumente"""
|
|
|
|
# Parsing
|
|
auto_keywords: int = 14
|
|
"""Anzahl automatisch generierter Keywords pro Chunk"""
|
|
|
|
auto_questions: int = 7
|
|
"""Anzahl automatisch generierter Fragen pro Chunk"""
|
|
|
|
parse_timeout_seconds: int = 120
|
|
"""Timeout beim Warten auf Document-Parsing"""
|
|
|
|
parse_poll_interval: float = 3.0
|
|
"""Poll-Interval beim Warten auf Parsing (Sekunden)"""
|
|
|
|
# Meta-Fields Keys
|
|
meta_blake3_key: str = "blake3_hash"
|
|
"""Key für Blake3-Hash in meta_fields (Change Detection)"""
|
|
|
|
meta_espocrm_id_key: str = "espocrm_id"
|
|
"""Key für EspoCRM Document ID in meta_fields"""
|
|
|
|
meta_description_key: str = "description"
|
|
"""Key für Dokument-Beschreibung in meta_fields"""
|
|
|
|
@classmethod
|
|
def from_env(cls) -> 'RAGFlowConfig':
|
|
"""Lädt RAGFlow-Config aus Environment Variables"""
|
|
return cls(
|
|
base_url=os.getenv('RAGFLOW_BASE_URL', 'http://192.168.1.64:9380'),
|
|
parse_timeout_seconds=int(os.getenv('RAGFLOW_PARSE_TIMEOUT', '120')),
|
|
)
|
|
|
|
|
|
RAGFLOW_CONFIG = RAGFlowConfig.from_env()
|