feat: Implement AI Knowledge Sync Utilities and Event Handlers

- Added AIKnowledgeActivationStatus and AIKnowledgeSyncStatus enums to models.py for managing activation and sync states.
- Introduced AIKnowledgeSync class in aiknowledge_sync_utils.py for synchronizing CAIKnowledge entities with XAI Collections, including collection lifecycle management, document synchronization, and metadata updates.
- Created a daily cron job (aiknowledge_full_sync_cron_step.py) to perform a full sync of CAIKnowledge entities.
- Developed an event handler (aiknowledge_sync_event_step.py) to synchronize CAIKnowledge entities with XAI Collections triggered by webhooks and cron jobs.
- Implemented a webhook handler (aiknowledge_update_api_step.py) to receive updates from EspoCRM for CAIKnowledge entities and enqueue sync events.
- Enhanced xai_service.py with methods for collection management, document listing, and metadata updates.
This commit is contained in:
bsiggel
2026-03-11 21:14:52 +00:00
parent a5a122b688
commit 9bbfa61b3b
7 changed files with 1366 additions and 1 deletions

View File

@@ -1,7 +1,8 @@
"""xAI Files & Collections Service"""
import os
import asyncio
import aiohttp
from typing import Optional, List
from typing import Optional, List, Dict, Tuple
from services.logging_utils import get_service_logger
XAI_FILES_URL = "https://api.x.ai"
@@ -173,3 +174,392 @@ class XAIService:
f"⚠️ Fehler beim Entfernen aus Collection {collection_id}: {e}",
level='warn'
)
# ========== Collection Management ==========
async def create_collection(
self,
name: str,
metadata: Optional[Dict[str, str]] = None,
field_definitions: Optional[List[Dict]] = None
) -> Dict:
"""
Erstellt eine neue xAI Collection.
POST https://management-api.x.ai/v1/collections
Args:
name: Collection name
metadata: Optional metadata dict
field_definitions: Optional field definitions for metadata fields
Returns:
Collection object mit 'id' field
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📚 Creating collection: {name}")
# Standard field definitions für document metadata
if field_definitions is None:
field_definitions = [
{"key": "document_name", "inject_into_chunk": True},
{"key": "description", "inject_into_chunk": True},
{"key": "created_at", "inject_into_chunk": False},
{"key": "modified_at", "inject_into_chunk": False},
{"key": "espocrm_id", "inject_into_chunk": False}
]
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Content-Type": "application/json"
}
body = {
"collection_name": name,
"field_definitions": field_definitions
}
# Add metadata if provided
if metadata:
body["metadata"] = metadata
async with session.post(url, json=body, headers=headers) as response:
if response.status not in (200, 201):
raw = await response.text()
raise RuntimeError(
f"Failed to create collection ({response.status}): {raw}"
)
data = await response.json()
collection_id = data.get('id')
self._log(f"✅ Collection created: {collection_id}")
return data
async def get_collection(self, collection_id: str) -> Optional[Dict]:
"""
Holt Collection-Details.
GET https://management-api.x.ai/v1/collections/{collection_id}
Returns:
Collection object or None if not found
Raises:
RuntimeError: bei HTTP-Fehler (außer 404)
"""
self._log(f"📄 Getting collection: {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.get(url, headers=headers) as response:
if response.status == 404:
self._log(f"⚠️ Collection not found: {collection_id}", level='warn')
return None
if response.status not in (200,):
raw = await response.text()
raise RuntimeError(
f"Failed to get collection ({response.status}): {raw}"
)
data = await response.json()
self._log(f"✅ Collection retrieved: {data.get('collection_name', 'N/A')}")
return data
async def delete_collection(self, collection_id: str) -> None:
"""
Löscht eine XAI Collection.
DELETE https://management-api.x.ai/v1/collections/{collection_id}
NOTE: Documents in der Collection werden NICHT gelöscht!
Sie können noch in anderen Collections sein.
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"🗑️ Deleting collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.delete(url, headers=headers) as response:
if response.status not in (200, 204):
raw = await response.text()
raise RuntimeError(
f"Failed to delete collection {collection_id} ({response.status}): {raw}"
)
self._log(f"✅ Collection deleted: {collection_id}")
async def list_collection_documents(self, collection_id: str) -> List[Dict]:
"""
Listet alle Dokumente in einer Collection.
GET https://management-api.x.ai/v1/collections/{collection_id}/documents
Returns:
List von document objects mit file_id, filename, hash, fields
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📋 Listing documents in collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.get(url, headers=headers) as response:
if response.status not in (200,):
raw = await response.text()
raise RuntimeError(
f"Failed to list documents ({response.status}): {raw}"
)
data = await response.json()
# API sollte eine Liste zurückgeben oder ein dict mit 'documents' key
if isinstance(data, list):
documents = data
elif isinstance(data, dict) and 'documents' in data:
documents = data['documents']
else:
documents = []
self._log(f"✅ Listed {len(documents)} documents")
return documents
async def get_collection_document(self, collection_id: str, file_id: str) -> Optional[Dict]:
"""
Holt Dokument-Details aus einer XAI Collection.
GET https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
Returns:
Dict mit document info including BLAKE3 hash:
{
'file_id': 'file_xyz',
'filename': 'document.pdf',
'hash': 'blake3:abcd1234...', # BLAKE3 Hash!
'fields': {...} # Metadata
}
Returns None if not found.
"""
self._log(f"📄 Getting document {file_id} from collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.get(url, headers=headers) as response:
if response.status == 404:
return None
if response.status not in (200,):
raw = await response.text()
raise RuntimeError(
f"Failed to get document from collection ({response.status}): {raw}"
)
data = await response.json()
self._log(f"✅ Document info retrieved: {data.get('filename', 'N/A')}")
return data
async def update_document_metadata(
self,
collection_id: str,
file_id: str,
metadata: Dict[str, str]
) -> None:
"""
Aktualisiert nur Metadaten eines Documents (kein File-Upload).
PATCH https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
Args:
collection_id: XAI Collection ID
file_id: XAI file_id
metadata: Updated metadata fields
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📝 Updating metadata for document {file_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Content-Type": "application/json"
}
body = {"fields": metadata}
async with session.patch(url, json=body, headers=headers) as response:
if response.status not in (200, 204):
raw = await response.text()
raise RuntimeError(
f"Failed to update document metadata ({response.status}): {raw}"
)
self._log(f"✅ Metadata updated for {file_id}")
# ========== High-Level Operations ==========
async def upload_document_with_metadata(
self,
collection_id: str,
file_content: bytes,
filename: str,
mime_type: str,
metadata: Dict[str, str]
) -> str:
"""
Upload file + add to collection with metadata in one operation.
Args:
collection_id: XAI Collection ID
file_content: File bytes
filename: Filename
mime_type: MIME type
metadata: Metadata fields
Returns:
XAI file_id
Raises:
RuntimeError: bei Upload/Add-Fehler
"""
# Step 1: Upload file
file_id = await self.upload_file(file_content, filename, mime_type)
try:
# Step 2: Add to collection (XAI API automatically handles metadata)
# Note: Metadata muss beim POST mit angegeben werden
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Content-Type": "application/json"
}
body = {"fields": metadata}
async with session.post(url, json=body, headers=headers) as response:
if response.status not in (200, 201):
raw = await response.text()
raise RuntimeError(
f"Failed to add file to collection with metadata ({response.status}): {raw}"
)
self._log(f"✅ File {file_id} added to collection {collection_id} with metadata")
return file_id
except Exception as e:
# Cleanup: File wurde hochgeladen aber nicht zur Collection hinzugefügt
self._log(f"⚠️ Failed to add to collection, file {file_id} may be orphaned", level='warn')
raise
async def verify_upload_integrity(
self,
collection_id: str,
file_id: str,
retry_attempts: int = 3
) -> Tuple[bool, Optional[str]]:
"""
Verifiziert Upload-Integrität via BLAKE3 Hash von XAI.
Args:
collection_id: XAI Collection ID
file_id: XAI file_id
retry_attempts: Retry bei temporären Fehlern
Returns:
(success: bool, blake3_hash: Optional[str])
"""
for attempt in range(1, retry_attempts + 1):
try:
doc_info = await self.get_collection_document(collection_id, file_id)
if not doc_info:
self._log(f"⚠️ Document {file_id} not found in collection", level='warn')
return (False, None)
blake3_hash = doc_info.get('hash')
if not blake3_hash:
self._log(f"⚠️ No hash returned by XAI API", level='warn')
return (False, None)
self._log(f"✅ Upload verified, BLAKE3: {blake3_hash[:32]}...")
return (True, blake3_hash)
except Exception as e:
if attempt < retry_attempts:
delay = 2 ** attempt # Exponential backoff
self._log(f"⚠️ Verification failed (attempt {attempt}), retry in {delay}s", level='warn')
await asyncio.sleep(delay)
else:
self._log(f"❌ Verification failed after {retry_attempts} attempts: {e}", level='error')
return (False, None)
return (False, None)
def is_mime_type_supported(self, mime_type: str) -> bool:
"""
Prüft, ob XAI diesen MIME-Type unterstützt.
Args:
mime_type: MIME type string
Returns:
True wenn unterstützt, False sonst
"""
# Liste der unterstützten MIME-Types basierend auf XAI Dokumentation
supported_types = {
# Documents
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.oasis.opendocument.text',
'application/epub+zip',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
# Text
'text/plain',
'text/html',
'text/markdown',
'text/csv',
'text/xml',
# Code
'text/javascript',
'application/json',
'application/xml',
'text/x-python',
'text/x-java-source',
'text/x-c',
'text/x-c++src',
# Other
'application/zip',
}
# Normalisiere MIME-Type (lowercase, strip whitespace)
normalized = mime_type.lower().strip()
return normalized in supported_types