feat: Implement AI Knowledge Sync Utilities and Event Handlers
- Added AIKnowledgeActivationStatus and AIKnowledgeSyncStatus enums to models.py for managing activation and sync states. - Introduced AIKnowledgeSync class in aiknowledge_sync_utils.py for synchronizing CAIKnowledge entities with XAI Collections, including collection lifecycle management, document synchronization, and metadata updates. - Created a daily cron job (aiknowledge_full_sync_cron_step.py) to perform a full sync of CAIKnowledge entities. - Developed an event handler (aiknowledge_sync_event_step.py) to synchronize CAIKnowledge entities with XAI Collections triggered by webhooks and cron jobs. - Implemented a webhook handler (aiknowledge_update_api_step.py) to receive updates from EspoCRM for CAIKnowledge entities and enqueue sync events. - Enhanced xai_service.py with methods for collection management, document listing, and metadata updates.
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
"""xAI Files & Collections Service"""
|
||||
import os
|
||||
import asyncio
|
||||
import aiohttp
|
||||
from typing import Optional, List
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
from services.logging_utils import get_service_logger
|
||||
|
||||
XAI_FILES_URL = "https://api.x.ai"
|
||||
@@ -173,3 +174,392 @@ class XAIService:
|
||||
f"⚠️ Fehler beim Entfernen aus Collection {collection_id}: {e}",
|
||||
level='warn'
|
||||
)
|
||||
|
||||
# ========== Collection Management ==========
|
||||
|
||||
async def create_collection(
|
||||
self,
|
||||
name: str,
|
||||
metadata: Optional[Dict[str, str]] = None,
|
||||
field_definitions: Optional[List[Dict]] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Erstellt eine neue xAI Collection.
|
||||
|
||||
POST https://management-api.x.ai/v1/collections
|
||||
|
||||
Args:
|
||||
name: Collection name
|
||||
metadata: Optional metadata dict
|
||||
field_definitions: Optional field definitions for metadata fields
|
||||
|
||||
Returns:
|
||||
Collection object mit 'id' field
|
||||
|
||||
Raises:
|
||||
RuntimeError: bei HTTP-Fehler
|
||||
"""
|
||||
self._log(f"📚 Creating collection: {name}")
|
||||
|
||||
# Standard field definitions für document metadata
|
||||
if field_definitions is None:
|
||||
field_definitions = [
|
||||
{"key": "document_name", "inject_into_chunk": True},
|
||||
{"key": "description", "inject_into_chunk": True},
|
||||
{"key": "created_at", "inject_into_chunk": False},
|
||||
{"key": "modified_at", "inject_into_chunk": False},
|
||||
{"key": "espocrm_id", "inject_into_chunk": False}
|
||||
]
|
||||
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.management_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
body = {
|
||||
"collection_name": name,
|
||||
"field_definitions": field_definitions
|
||||
}
|
||||
|
||||
# Add metadata if provided
|
||||
if metadata:
|
||||
body["metadata"] = metadata
|
||||
|
||||
async with session.post(url, json=body, headers=headers) as response:
|
||||
if response.status not in (200, 201):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to create collection ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
data = await response.json()
|
||||
|
||||
collection_id = data.get('id')
|
||||
self._log(f"✅ Collection created: {collection_id}")
|
||||
return data
|
||||
|
||||
async def get_collection(self, collection_id: str) -> Optional[Dict]:
|
||||
"""
|
||||
Holt Collection-Details.
|
||||
|
||||
GET https://management-api.x.ai/v1/collections/{collection_id}
|
||||
|
||||
Returns:
|
||||
Collection object or None if not found
|
||||
|
||||
Raises:
|
||||
RuntimeError: bei HTTP-Fehler (außer 404)
|
||||
"""
|
||||
self._log(f"📄 Getting collection: {collection_id}")
|
||||
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
|
||||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||||
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 404:
|
||||
self._log(f"⚠️ Collection not found: {collection_id}", level='warn')
|
||||
return None
|
||||
|
||||
if response.status not in (200,):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to get collection ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
data = await response.json()
|
||||
|
||||
self._log(f"✅ Collection retrieved: {data.get('collection_name', 'N/A')}")
|
||||
return data
|
||||
|
||||
async def delete_collection(self, collection_id: str) -> None:
|
||||
"""
|
||||
Löscht eine XAI Collection.
|
||||
|
||||
DELETE https://management-api.x.ai/v1/collections/{collection_id}
|
||||
|
||||
NOTE: Documents in der Collection werden NICHT gelöscht!
|
||||
Sie können noch in anderen Collections sein.
|
||||
|
||||
Raises:
|
||||
RuntimeError: bei HTTP-Fehler
|
||||
"""
|
||||
self._log(f"🗑️ Deleting collection {collection_id}")
|
||||
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
|
||||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||||
|
||||
async with session.delete(url, headers=headers) as response:
|
||||
if response.status not in (200, 204):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to delete collection {collection_id} ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
self._log(f"✅ Collection deleted: {collection_id}")
|
||||
|
||||
async def list_collection_documents(self, collection_id: str) -> List[Dict]:
|
||||
"""
|
||||
Listet alle Dokumente in einer Collection.
|
||||
|
||||
GET https://management-api.x.ai/v1/collections/{collection_id}/documents
|
||||
|
||||
Returns:
|
||||
List von document objects mit file_id, filename, hash, fields
|
||||
|
||||
Raises:
|
||||
RuntimeError: bei HTTP-Fehler
|
||||
"""
|
||||
self._log(f"📋 Listing documents in collection {collection_id}")
|
||||
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents"
|
||||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||||
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status not in (200,):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to list documents ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
data = await response.json()
|
||||
|
||||
# API sollte eine Liste zurückgeben oder ein dict mit 'documents' key
|
||||
if isinstance(data, list):
|
||||
documents = data
|
||||
elif isinstance(data, dict) and 'documents' in data:
|
||||
documents = data['documents']
|
||||
else:
|
||||
documents = []
|
||||
|
||||
self._log(f"✅ Listed {len(documents)} documents")
|
||||
return documents
|
||||
|
||||
async def get_collection_document(self, collection_id: str, file_id: str) -> Optional[Dict]:
|
||||
"""
|
||||
Holt Dokument-Details aus einer XAI Collection.
|
||||
|
||||
GET https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||||
|
||||
Returns:
|
||||
Dict mit document info including BLAKE3 hash:
|
||||
{
|
||||
'file_id': 'file_xyz',
|
||||
'filename': 'document.pdf',
|
||||
'hash': 'blake3:abcd1234...', # BLAKE3 Hash!
|
||||
'fields': {...} # Metadata
|
||||
}
|
||||
|
||||
Returns None if not found.
|
||||
"""
|
||||
self._log(f"📄 Getting document {file_id} from collection {collection_id}")
|
||||
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||||
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status == 404:
|
||||
return None
|
||||
|
||||
if response.status not in (200,):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to get document from collection ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
data = await response.json()
|
||||
|
||||
self._log(f"✅ Document info retrieved: {data.get('filename', 'N/A')}")
|
||||
return data
|
||||
|
||||
async def update_document_metadata(
|
||||
self,
|
||||
collection_id: str,
|
||||
file_id: str,
|
||||
metadata: Dict[str, str]
|
||||
) -> None:
|
||||
"""
|
||||
Aktualisiert nur Metadaten eines Documents (kein File-Upload).
|
||||
|
||||
PATCH https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||||
|
||||
Args:
|
||||
collection_id: XAI Collection ID
|
||||
file_id: XAI file_id
|
||||
metadata: Updated metadata fields
|
||||
|
||||
Raises:
|
||||
RuntimeError: bei HTTP-Fehler
|
||||
"""
|
||||
self._log(f"📝 Updating metadata for document {file_id}")
|
||||
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.management_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
body = {"fields": metadata}
|
||||
|
||||
async with session.patch(url, json=body, headers=headers) as response:
|
||||
if response.status not in (200, 204):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to update document metadata ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
self._log(f"✅ Metadata updated for {file_id}")
|
||||
|
||||
# ========== High-Level Operations ==========
|
||||
|
||||
async def upload_document_with_metadata(
|
||||
self,
|
||||
collection_id: str,
|
||||
file_content: bytes,
|
||||
filename: str,
|
||||
mime_type: str,
|
||||
metadata: Dict[str, str]
|
||||
) -> str:
|
||||
"""
|
||||
Upload file + add to collection with metadata in one operation.
|
||||
|
||||
Args:
|
||||
collection_id: XAI Collection ID
|
||||
file_content: File bytes
|
||||
filename: Filename
|
||||
mime_type: MIME type
|
||||
metadata: Metadata fields
|
||||
|
||||
Returns:
|
||||
XAI file_id
|
||||
|
||||
Raises:
|
||||
RuntimeError: bei Upload/Add-Fehler
|
||||
"""
|
||||
# Step 1: Upload file
|
||||
file_id = await self.upload_file(file_content, filename, mime_type)
|
||||
|
||||
try:
|
||||
# Step 2: Add to collection (XAI API automatically handles metadata)
|
||||
# Note: Metadata muss beim POST mit angegeben werden
|
||||
session = await self._get_session()
|
||||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.management_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
body = {"fields": metadata}
|
||||
|
||||
async with session.post(url, json=body, headers=headers) as response:
|
||||
if response.status not in (200, 201):
|
||||
raw = await response.text()
|
||||
raise RuntimeError(
|
||||
f"Failed to add file to collection with metadata ({response.status}): {raw}"
|
||||
)
|
||||
|
||||
self._log(f"✅ File {file_id} added to collection {collection_id} with metadata")
|
||||
return file_id
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup: File wurde hochgeladen aber nicht zur Collection hinzugefügt
|
||||
self._log(f"⚠️ Failed to add to collection, file {file_id} may be orphaned", level='warn')
|
||||
raise
|
||||
|
||||
async def verify_upload_integrity(
|
||||
self,
|
||||
collection_id: str,
|
||||
file_id: str,
|
||||
retry_attempts: int = 3
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Verifiziert Upload-Integrität via BLAKE3 Hash von XAI.
|
||||
|
||||
Args:
|
||||
collection_id: XAI Collection ID
|
||||
file_id: XAI file_id
|
||||
retry_attempts: Retry bei temporären Fehlern
|
||||
|
||||
Returns:
|
||||
(success: bool, blake3_hash: Optional[str])
|
||||
"""
|
||||
for attempt in range(1, retry_attempts + 1):
|
||||
try:
|
||||
doc_info = await self.get_collection_document(collection_id, file_id)
|
||||
|
||||
if not doc_info:
|
||||
self._log(f"⚠️ Document {file_id} not found in collection", level='warn')
|
||||
return (False, None)
|
||||
|
||||
blake3_hash = doc_info.get('hash')
|
||||
|
||||
if not blake3_hash:
|
||||
self._log(f"⚠️ No hash returned by XAI API", level='warn')
|
||||
return (False, None)
|
||||
|
||||
self._log(f"✅ Upload verified, BLAKE3: {blake3_hash[:32]}...")
|
||||
return (True, blake3_hash)
|
||||
|
||||
except Exception as e:
|
||||
if attempt < retry_attempts:
|
||||
delay = 2 ** attempt # Exponential backoff
|
||||
self._log(f"⚠️ Verification failed (attempt {attempt}), retry in {delay}s", level='warn')
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
self._log(f"❌ Verification failed after {retry_attempts} attempts: {e}", level='error')
|
||||
return (False, None)
|
||||
|
||||
return (False, None)
|
||||
|
||||
def is_mime_type_supported(self, mime_type: str) -> bool:
|
||||
"""
|
||||
Prüft, ob XAI diesen MIME-Type unterstützt.
|
||||
|
||||
Args:
|
||||
mime_type: MIME type string
|
||||
|
||||
Returns:
|
||||
True wenn unterstützt, False sonst
|
||||
"""
|
||||
# Liste der unterstützten MIME-Types basierend auf XAI Dokumentation
|
||||
supported_types = {
|
||||
# Documents
|
||||
'application/pdf',
|
||||
'application/msword',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.ms-excel',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.oasis.opendocument.text',
|
||||
'application/epub+zip',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
|
||||
# Text
|
||||
'text/plain',
|
||||
'text/html',
|
||||
'text/markdown',
|
||||
'text/csv',
|
||||
'text/xml',
|
||||
|
||||
# Code
|
||||
'text/javascript',
|
||||
'application/json',
|
||||
'application/xml',
|
||||
'text/x-python',
|
||||
'text/x-java-source',
|
||||
'text/x-c',
|
||||
'text/x-c++src',
|
||||
|
||||
# Other
|
||||
'application/zip',
|
||||
}
|
||||
|
||||
# Normalisiere MIME-Type (lowercase, strip whitespace)
|
||||
normalized = mime_type.lower().strip()
|
||||
|
||||
return normalized in supported_types
|
||||
|
||||
Reference in New Issue
Block a user