feat: Implement AI Knowledge Sync Utilities and RAGFlow Service
- Added `aiknowledge_sync_utils.py` for provider-agnostic synchronization logic for CAIKnowledge entities, supporting both xAI and RAGFlow. - Introduced lifecycle management for CAIKnowledge entities including states: new, active, paused, and deactivated. - Implemented change detection using Blake3 hash for efficient document synchronization. - Created `ragflow_service.py` to handle dataset and document management with RAGFlow API. - Added daily cron job in `aiknowledge_daily_cron_step.py` to synchronize active CAIKnowledge entities with unclean or failed statuses. - Developed `aiknowledge_sync_event_step.py` to process synchronization events from webhooks and cron jobs.
This commit is contained in:
@@ -209,3 +209,106 @@ class XAIUploadUtils:
|
||||
})
|
||||
except Exception as e:
|
||||
self._log.warn(f" ⚠️ Could not remove from xAI: {e}")
|
||||
|
||||
|
||||
class XAIProviderAdapter:
|
||||
"""
|
||||
Adapter der XAIService auf das Provider-Interface bringt,
|
||||
das AIKnowledgeSyncUtils erwartet.
|
||||
|
||||
Interface (identisch mit RAGFlowService):
|
||||
ensure_dataset(name, description) -> dict mit 'id'
|
||||
list_documents(dataset_id) -> list[dict] mit 'id', 'name'
|
||||
upload_document(dataset_id, file_content, filename, mime_type,
|
||||
blake3_hash, espocrm_id, description,
|
||||
advoware_art, advoware_bemerkung) -> dict mit 'id'
|
||||
update_document_meta(dataset_id, doc_id, ...) -> None
|
||||
remove_document(dataset_id, doc_id) -> None
|
||||
delete_dataset(dataset_id) -> None
|
||||
is_mime_type_supported(mime_type) -> bool
|
||||
"""
|
||||
|
||||
def __init__(self, ctx=None):
|
||||
from services.xai_service import XAIService
|
||||
from services.logging_utils import get_service_logger
|
||||
self._xai = XAIService(ctx)
|
||||
self._log = get_service_logger('xai_adapter', ctx)
|
||||
|
||||
async def ensure_dataset(self, name: str, description: str = '') -> dict:
|
||||
"""Erstellt oder verifiziert eine xAI Collection. Gibt {'id': collection_id} zurueck."""
|
||||
existing = await self._xai.get_collection_by_name(name)
|
||||
if existing:
|
||||
col_id = existing.get('collection_id') or existing.get('id')
|
||||
return {'id': col_id, 'name': name}
|
||||
result = await self._xai.create_collection(name=name)
|
||||
col_id = result.get('collection_id') or result.get('id')
|
||||
return {'id': col_id, 'name': name}
|
||||
|
||||
async def list_documents(self, dataset_id: str) -> list:
|
||||
"""Listet alle Dokumente in einer xAI Collection auf."""
|
||||
raw = await self._xai.list_collection_documents(dataset_id)
|
||||
return [{'id': d.get('file_id'), 'name': d.get('filename')} for d in raw]
|
||||
|
||||
async def upload_document(
|
||||
self,
|
||||
dataset_id: str,
|
||||
file_content: bytes,
|
||||
filename: str,
|
||||
mime_type: str = 'application/octet-stream',
|
||||
blake3_hash=None,
|
||||
espocrm_id=None,
|
||||
description=None,
|
||||
advoware_art=None,
|
||||
advoware_bemerkung=None,
|
||||
) -> dict:
|
||||
"""Laedt Dokument in xAI Collection mit Metadata-Fields."""
|
||||
fields_raw = {
|
||||
'document_name': filename,
|
||||
'espocrm_id': espocrm_id or '',
|
||||
'description': description or '',
|
||||
'advoware_art': advoware_art or '',
|
||||
'advoware_bemerkung': advoware_bemerkung or '',
|
||||
}
|
||||
if blake3_hash:
|
||||
fields_raw['blake3_hash'] = blake3_hash
|
||||
fields = {k: v for k, v in fields_raw.items() if v}
|
||||
|
||||
file_id = await self._xai.upload_to_collection(
|
||||
collection_id=dataset_id,
|
||||
file_content=file_content,
|
||||
filename=filename,
|
||||
mime_type=mime_type,
|
||||
fields=fields,
|
||||
)
|
||||
return {'id': file_id, 'name': filename}
|
||||
|
||||
async def update_document_meta(
|
||||
self,
|
||||
dataset_id: str,
|
||||
doc_id: str,
|
||||
blake3_hash=None,
|
||||
description=None,
|
||||
advoware_art=None,
|
||||
advoware_bemerkung=None,
|
||||
) -> None:
|
||||
"""
|
||||
xAI unterstuetzt kein PATCH fuer Metadaten.
|
||||
Re-Upload wird vom Caller gesteuert (via syncedMetadataHash Aenderung
|
||||
fuehrt zum vollstaendigen Upload-Path).
|
||||
Hier kein-op.
|
||||
"""
|
||||
self._log.warn(
|
||||
"XAIProviderAdapter.update_document_meta: xAI unterstuetzt kein "
|
||||
"Metadaten-PATCH – kein-op. Naechster Sync loest Re-Upload aus."
|
||||
)
|
||||
|
||||
async def remove_document(self, dataset_id: str, doc_id: str) -> None:
|
||||
"""Loescht Dokument aus xAI Collection (Datei bleibt in xAI Files API)."""
|
||||
await self._xai.remove_from_collection(dataset_id, doc_id)
|
||||
|
||||
async def delete_dataset(self, dataset_id: str) -> None:
|
||||
"""Loescht xAI Collection."""
|
||||
await self._xai.delete_collection(dataset_id)
|
||||
|
||||
def is_mime_type_supported(self, mime_type: str) -> bool:
|
||||
return self._xai.is_mime_type_supported(mime_type)
|
||||
|
||||
Reference in New Issue
Block a user