507 lines
18 KiB
Python
507 lines
18 KiB
Python
"""xAI Files & Collections Service"""
|
||
import os
|
||
import asyncio
|
||
import aiohttp
|
||
from typing import Optional, List, Dict, Tuple
|
||
from services.logging_utils import get_service_logger
|
||
|
||
XAI_FILES_URL = "https://api.x.ai"
|
||
XAI_MANAGEMENT_URL = "https://management-api.x.ai"
|
||
|
||
|
||
class XAIService:
|
||
"""
|
||
Client für xAI Files API und Collections Management API.
|
||
|
||
Benötigte Umgebungsvariablen:
|
||
- XAI_API_KEY – regulärer API-Key für File-Uploads (api.x.ai)
|
||
- XAI_MANAGEMENT_KEY – Management-API-Key für Collection-Operationen (management-api.x.ai)
|
||
"""
|
||
|
||
def __init__(self, ctx=None):
|
||
self.api_key = os.getenv('XAI_API_KEY', '')
|
||
self.management_key = os.getenv('XAI_MANAGEMENT_KEY', '')
|
||
self.ctx = ctx
|
||
self.logger = get_service_logger('xai', ctx)
|
||
self._session: Optional[aiohttp.ClientSession] = None
|
||
|
||
if not self.api_key:
|
||
raise ValueError("XAI_API_KEY not configured in environment")
|
||
if not self.management_key:
|
||
raise ValueError("XAI_MANAGEMENT_KEY not configured in environment")
|
||
|
||
def _log(self, msg: str, level: str = 'info') -> None:
|
||
"""Delegate logging to service logger"""
|
||
log_func = getattr(self.logger, level, self.logger.info)
|
||
log_func(msg)
|
||
|
||
async def _get_session(self) -> aiohttp.ClientSession:
|
||
if self._session is None or self._session.closed:
|
||
self._session = aiohttp.ClientSession(
|
||
timeout=aiohttp.ClientTimeout(total=120)
|
||
)
|
||
return self._session
|
||
|
||
async def close(self) -> None:
|
||
if self._session and not self._session.closed:
|
||
await self._session.close()
|
||
|
||
async def upload_file(
|
||
self,
|
||
file_content: bytes,
|
||
filename: str,
|
||
mime_type: str = 'application/octet-stream'
|
||
) -> str:
|
||
"""
|
||
Lädt eine Datei zur xAI Files API hoch (multipart/form-data).
|
||
|
||
POST https://api.x.ai/v1/files
|
||
|
||
Returns:
|
||
xAI file_id (str)
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler oder fehlendem file_id in der Antwort
|
||
"""
|
||
self._log(f"📤 Uploading {len(file_content)} bytes to xAI: {filename}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_FILES_URL}/v1/files"
|
||
headers = {"Authorization": f"Bearer {self.api_key}"}
|
||
|
||
form = aiohttp.FormData()
|
||
form.add_field('file', file_content, filename=filename, content_type=mime_type)
|
||
|
||
async with session.post(url, data=form, headers=headers) as response:
|
||
try:
|
||
data = await response.json()
|
||
except Exception:
|
||
raw = await response.text()
|
||
data = {"_raw": raw}
|
||
|
||
if response.status not in (200, 201):
|
||
raise RuntimeError(
|
||
f"xAI file upload failed ({response.status}): {data}"
|
||
)
|
||
|
||
file_id = data.get('id') or data.get('file_id')
|
||
if not file_id:
|
||
raise RuntimeError(
|
||
f"No file_id in xAI upload response: {data}"
|
||
)
|
||
|
||
self._log(f"✅ xAI file uploaded: {file_id}")
|
||
return file_id
|
||
|
||
async def add_to_collection(self, collection_id: str, file_id: str) -> None:
|
||
"""
|
||
Fügt eine Datei einer xAI-Collection hinzu.
|
||
|
||
POST https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler
|
||
"""
|
||
self._log(f"📚 Adding file {file_id} to collection {collection_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||
headers = {
|
||
"Authorization": f"Bearer {self.management_key}",
|
||
"Content-Type": "application/json",
|
||
}
|
||
|
||
async with session.post(url, headers=headers) as response:
|
||
if response.status not in (200, 201):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to add file to collection {collection_id} ({response.status}): {raw}"
|
||
)
|
||
|
||
self._log(f"✅ File {file_id} added to collection {collection_id}")
|
||
|
||
async def remove_from_collection(self, collection_id: str, file_id: str) -> None:
|
||
"""
|
||
Entfernt eine Datei aus einer xAI-Collection.
|
||
Die Datei selbst wird NICHT gelöscht – sie kann in anderen Collections sein.
|
||
|
||
DELETE https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler
|
||
"""
|
||
self._log(f"🗑️ Removing file {file_id} from collection {collection_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||
|
||
async with session.delete(url, headers=headers) as response:
|
||
if response.status not in (200, 204):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to remove file from collection {collection_id} ({response.status}): {raw}"
|
||
)
|
||
|
||
self._log(f"✅ File {file_id} removed from collection {collection_id}")
|
||
|
||
async def add_to_collections(self, collection_ids: List[str], file_id: str) -> List[str]:
|
||
"""
|
||
Fügt eine Datei zu mehreren Collections hinzu.
|
||
|
||
Returns:
|
||
Liste der erfolgreich hinzugefügten Collection-IDs
|
||
"""
|
||
added = []
|
||
for collection_id in collection_ids:
|
||
try:
|
||
await self.add_to_collection(collection_id, file_id)
|
||
added.append(collection_id)
|
||
except Exception as e:
|
||
self._log(
|
||
f"⚠️ Fehler beim Hinzufügen zu Collection {collection_id}: {e}",
|
||
level='warn'
|
||
)
|
||
return added
|
||
|
||
async def remove_from_collections(self, collection_ids: List[str], file_id: str) -> None:
|
||
"""Entfernt eine Datei aus mehreren Collections (ignoriert Fehler pro Collection)."""
|
||
for collection_id in collection_ids:
|
||
try:
|
||
await self.remove_from_collection(collection_id, file_id)
|
||
except Exception as e:
|
||
self._log(
|
||
f"⚠️ Fehler beim Entfernen aus Collection {collection_id}: {e}",
|
||
level='warn'
|
||
)
|
||
|
||
# ========== Collection Management ==========
|
||
|
||
async def create_collection(
|
||
self,
|
||
name: str,
|
||
metadata: Optional[Dict[str, str]] = None,
|
||
field_definitions: Optional[List[Dict]] = None
|
||
) -> Dict:
|
||
"""
|
||
Erstellt eine neue xAI Collection.
|
||
|
||
POST https://management-api.x.ai/v1/collections
|
||
|
||
Args:
|
||
name: Collection name
|
||
metadata: Optional metadata dict
|
||
field_definitions: Optional field definitions for metadata fields
|
||
|
||
Returns:
|
||
Collection object mit 'id' field
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler
|
||
"""
|
||
self._log(f"📚 Creating collection: {name}")
|
||
|
||
# Standard field definitions für document metadata
|
||
if field_definitions is None:
|
||
field_definitions = [
|
||
{"key": "document_name", "inject_into_chunk": True},
|
||
{"key": "description", "inject_into_chunk": True},
|
||
{"key": "created_at", "inject_into_chunk": False},
|
||
{"key": "modified_at", "inject_into_chunk": False},
|
||
{"key": "espocrm_id", "inject_into_chunk": False}
|
||
]
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections"
|
||
headers = {
|
||
"Authorization": f"Bearer {self.management_key}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
body = {
|
||
"collection_name": name,
|
||
"field_definitions": field_definitions
|
||
}
|
||
|
||
# Add metadata if provided
|
||
if metadata:
|
||
body["metadata"] = metadata
|
||
|
||
async with session.post(url, json=body, headers=headers) as response:
|
||
if response.status not in (200, 201):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to create collection ({response.status}): {raw}"
|
||
)
|
||
|
||
data = await response.json()
|
||
|
||
# API returns 'collection_id' not 'id'
|
||
collection_id = data.get('collection_id') or data.get('id')
|
||
self._log(f"✅ Collection created: {collection_id}")
|
||
return data
|
||
|
||
async def get_collection(self, collection_id: str) -> Optional[Dict]:
|
||
"""
|
||
Holt Collection-Details.
|
||
|
||
GET https://management-api.x.ai/v1/collections/{collection_id}
|
||
|
||
Returns:
|
||
Collection object or None if not found
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler (außer 404)
|
||
"""
|
||
self._log(f"📄 Getting collection: {collection_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
|
||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||
|
||
async with session.get(url, headers=headers) as response:
|
||
if response.status == 404:
|
||
self._log(f"⚠️ Collection not found: {collection_id}", level='warn')
|
||
return None
|
||
|
||
if response.status not in (200,):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to get collection ({response.status}): {raw}"
|
||
)
|
||
|
||
data = await response.json()
|
||
|
||
self._log(f"✅ Collection retrieved: {data.get('collection_name', 'N/A')}")
|
||
return data
|
||
|
||
async def delete_collection(self, collection_id: str) -> None:
|
||
"""
|
||
Löscht eine XAI Collection.
|
||
|
||
DELETE https://management-api.x.ai/v1/collections/{collection_id}
|
||
|
||
NOTE: Documents in der Collection werden NICHT gelöscht!
|
||
Sie können noch in anderen Collections sein.
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler
|
||
"""
|
||
self._log(f"🗑️ Deleting collection {collection_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
|
||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||
|
||
async with session.delete(url, headers=headers) as response:
|
||
if response.status not in (200, 204):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to delete collection {collection_id} ({response.status}): {raw}"
|
||
)
|
||
|
||
self._log(f"✅ Collection deleted: {collection_id}")
|
||
|
||
async def list_collection_documents(self, collection_id: str) -> List[Dict]:
|
||
"""
|
||
Listet alle Dokumente in einer Collection.
|
||
|
||
GET https://management-api.x.ai/v1/collections/{collection_id}/documents
|
||
|
||
Returns:
|
||
List von normalized document objects:
|
||
[
|
||
{
|
||
'file_id': 'file_...',
|
||
'filename': 'doc.pdf',
|
||
'blake3_hash': 'hex_string', # Plain hex, kein prefix
|
||
'size_bytes': 12345,
|
||
'content_type': 'application/pdf',
|
||
'fields': {}, # Custom metadata
|
||
'status': 'DOCUMENT_STATUS_...'
|
||
}
|
||
]
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler
|
||
"""
|
||
self._log(f"📋 Listing documents in collection {collection_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents"
|
||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||
|
||
async with session.get(url, headers=headers) as response:
|
||
if response.status not in (200,):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to list documents ({response.status}): {raw}"
|
||
)
|
||
|
||
data = await response.json()
|
||
|
||
# API gibt Liste zurück oder dict mit 'documents' key
|
||
if isinstance(data, list):
|
||
raw_documents = data
|
||
elif isinstance(data, dict) and 'documents' in data:
|
||
raw_documents = data['documents']
|
||
else:
|
||
raw_documents = []
|
||
|
||
# Normalize nested structure: file_metadata -> top-level
|
||
normalized = []
|
||
for doc in raw_documents:
|
||
file_meta = doc.get('file_metadata', {})
|
||
normalized.append({
|
||
'file_id': file_meta.get('file_id'),
|
||
'filename': file_meta.get('name'),
|
||
'blake3_hash': file_meta.get('hash'), # Plain hex string
|
||
'size_bytes': int(file_meta.get('size_bytes', 0)) if file_meta.get('size_bytes') else 0,
|
||
'content_type': file_meta.get('content_type'),
|
||
'created_at': file_meta.get('created_at'),
|
||
'fields': doc.get('fields', {}),
|
||
'status': doc.get('status')
|
||
})
|
||
|
||
self._log(f"✅ Listed {len(normalized)} documents")
|
||
return normalized
|
||
|
||
async def get_collection_document(self, collection_id: str, file_id: str) -> Optional[Dict]:
|
||
"""
|
||
Holt Dokument-Details aus einer XAI Collection.
|
||
|
||
GET https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||
|
||
Returns:
|
||
Normalized dict mit document info:
|
||
{
|
||
'file_id': 'file_xyz',
|
||
'filename': 'document.pdf',
|
||
'blake3_hash': 'hex_string', # Plain hex, kein prefix
|
||
'size_bytes': 12345,
|
||
'content_type': 'application/pdf',
|
||
'fields': {...} # Custom metadata
|
||
}
|
||
|
||
Returns None if not found.
|
||
"""
|
||
self._log(f"📄 Getting document {file_id} from collection {collection_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||
headers = {"Authorization": f"Bearer {self.management_key}"}
|
||
|
||
async with session.get(url, headers=headers) as response:
|
||
if response.status == 404:
|
||
return None
|
||
|
||
if response.status not in (200,):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to get document from collection ({response.status}): {raw}"
|
||
)
|
||
|
||
data = await response.json()
|
||
|
||
# Normalize nested structure
|
||
file_meta = data.get('file_metadata', {})
|
||
normalized = {
|
||
'file_id': file_meta.get('file_id'),
|
||
'filename': file_meta.get('name'),
|
||
'blake3_hash': file_meta.get('hash'), # Plain hex
|
||
'size_bytes': int(file_meta.get('size_bytes', 0)) if file_meta.get('size_bytes') else 0,
|
||
'content_type': file_meta.get('content_type'),
|
||
'created_at': file_meta.get('created_at'),
|
||
'fields': data.get('fields', {}),
|
||
'status': data.get('status')
|
||
}
|
||
|
||
self._log(f"✅ Document info retrieved: {normalized.get('filename', 'N/A')}")
|
||
return normalized
|
||
|
||
async def update_document_metadata(
|
||
self,
|
||
collection_id: str,
|
||
file_id: str,
|
||
metadata: Dict[str, str]
|
||
) -> None:
|
||
"""
|
||
Aktualisiert nur Metadaten eines Documents (kein File-Upload).
|
||
|
||
PATCH https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
|
||
|
||
Args:
|
||
collection_id: XAI Collection ID
|
||
file_id: XAI file_id
|
||
metadata: Updated metadata fields
|
||
|
||
Raises:
|
||
RuntimeError: bei HTTP-Fehler
|
||
"""
|
||
self._log(f"📝 Updating metadata for document {file_id}")
|
||
|
||
session = await self._get_session()
|
||
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
|
||
headers = {
|
||
"Authorization": f"Bearer {self.management_key}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
body = {"fields": metadata}
|
||
|
||
async with session.patch(url, json=body, headers=headers) as response:
|
||
if response.status not in (200, 204):
|
||
raw = await response.text()
|
||
raise RuntimeError(
|
||
f"Failed to update document metadata ({response.status}): {raw}"
|
||
)
|
||
|
||
self._log(f"✅ Metadata updated for {file_id}")
|
||
|
||
def is_mime_type_supported(self, mime_type: str) -> bool:
|
||
"""
|
||
Prüft, ob XAI diesen MIME-Type unterstützt.
|
||
|
||
Args:
|
||
mime_type: MIME type string
|
||
|
||
Returns:
|
||
True wenn unterstützt, False sonst
|
||
"""
|
||
# Liste der unterstützten MIME-Types basierend auf XAI Dokumentation
|
||
supported_types = {
|
||
# Documents
|
||
'application/pdf',
|
||
'application/msword',
|
||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||
'application/vnd.ms-excel',
|
||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||
'application/vnd.oasis.opendocument.text',
|
||
'application/epub+zip',
|
||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||
|
||
# Text
|
||
'text/plain',
|
||
'text/html',
|
||
'text/markdown',
|
||
'text/csv',
|
||
'text/xml',
|
||
|
||
# Code
|
||
'text/javascript',
|
||
'application/json',
|
||
'application/xml',
|
||
'text/x-python',
|
||
'text/x-java-source',
|
||
'text/x-c',
|
||
'text/x-c++src',
|
||
|
||
# Other
|
||
'application/zip',
|
||
}
|
||
|
||
# Normalisiere MIME-Type (lowercase, strip whitespace)
|
||
normalized = mime_type.lower().strip()
|
||
|
||
return normalized in supported_types
|