Files
motia-iii/services/xai_service.py

515 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""xAI Files & Collections Service"""
import os
import asyncio
import aiohttp
from typing import Optional, List, Dict, Tuple
from services.logging_utils import get_service_logger
XAI_FILES_URL = "https://api.x.ai"
XAI_MANAGEMENT_URL = "https://management-api.x.ai"
class XAIService:
"""
Client für xAI Files API und Collections Management API.
Benötigte Umgebungsvariablen:
- XAI_API_KEY regulärer API-Key für File-Uploads (api.x.ai)
- XAI_MANAGEMENT_KEY Management-API-Key für Collection-Operationen (management-api.x.ai)
"""
def __init__(self, ctx=None):
self.api_key = os.getenv('XAI_API_KEY', '')
self.management_key = os.getenv('XAI_MANAGEMENT_KEY', '')
self.ctx = ctx
self.logger = get_service_logger('xai', ctx)
self._session: Optional[aiohttp.ClientSession] = None
if not self.api_key:
raise ValueError("XAI_API_KEY not configured in environment")
if not self.management_key:
raise ValueError("XAI_MANAGEMENT_KEY not configured in environment")
def _log(self, msg: str, level: str = 'info') -> None:
"""Delegate logging to service logger"""
log_func = getattr(self.logger, level, self.logger.info)
log_func(msg)
async def _get_session(self) -> aiohttp.ClientSession:
if self._session is None or self._session.closed:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=120)
)
return self._session
async def close(self) -> None:
if self._session and not self._session.closed:
await self._session.close()
async def upload_file(
self,
file_content: bytes,
filename: str,
mime_type: str = 'application/octet-stream'
) -> str:
"""
Lädt eine Datei zur xAI Files API hoch (multipart/form-data).
POST https://api.x.ai/v1/files
Returns:
xAI file_id (str)
Raises:
RuntimeError: bei HTTP-Fehler oder fehlendem file_id in der Antwort
"""
self._log(f"📤 Uploading {len(file_content)} bytes to xAI: {filename}")
session = await self._get_session()
url = f"{XAI_FILES_URL}/v1/files"
headers = {"Authorization": f"Bearer {self.api_key}"}
# Create multipart form with explicit UTF-8 filename encoding
# aiohttp automatically URL-encodes filenames with special chars,
# but xAI expects raw UTF-8 in the filename parameter
form = aiohttp.FormData(quote_fields=False)
form.add_field(
'file',
file_content,
filename=filename,
content_type=mime_type
)
async with session.post(url, data=form, headers=headers) as response:
try:
data = await response.json()
except Exception:
raw = await response.text()
data = {"_raw": raw}
if response.status not in (200, 201):
raise RuntimeError(
f"xAI file upload failed ({response.status}): {data}"
)
file_id = data.get('id') or data.get('file_id')
if not file_id:
raise RuntimeError(
f"No file_id in xAI upload response: {data}"
)
self._log(f"✅ xAI file uploaded: {file_id}")
return file_id
async def add_to_collection(self, collection_id: str, file_id: str) -> None:
"""
Fügt eine Datei einer xAI-Collection hinzu.
POST https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📚 Adding file {file_id} to collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Content-Type": "application/json",
}
async with session.post(url, headers=headers) as response:
if response.status not in (200, 201):
raw = await response.text()
raise RuntimeError(
f"Failed to add file to collection {collection_id} ({response.status}): {raw}"
)
self._log(f"✅ File {file_id} added to collection {collection_id}")
async def remove_from_collection(self, collection_id: str, file_id: str) -> None:
"""
Entfernt eine Datei aus einer xAI-Collection.
Die Datei selbst wird NICHT gelöscht sie kann in anderen Collections sein.
DELETE https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"🗑️ Removing file {file_id} from collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.delete(url, headers=headers) as response:
if response.status not in (200, 204):
raw = await response.text()
raise RuntimeError(
f"Failed to remove file from collection {collection_id} ({response.status}): {raw}"
)
self._log(f"✅ File {file_id} removed from collection {collection_id}")
async def add_to_collections(self, collection_ids: List[str], file_id: str) -> List[str]:
"""
Fügt eine Datei zu mehreren Collections hinzu.
Returns:
Liste der erfolgreich hinzugefügten Collection-IDs
"""
added = []
for collection_id in collection_ids:
try:
await self.add_to_collection(collection_id, file_id)
added.append(collection_id)
except Exception as e:
self._log(
f"⚠️ Fehler beim Hinzufügen zu Collection {collection_id}: {e}",
level='warn'
)
return added
async def remove_from_collections(self, collection_ids: List[str], file_id: str) -> None:
"""Entfernt eine Datei aus mehreren Collections (ignoriert Fehler pro Collection)."""
for collection_id in collection_ids:
try:
await self.remove_from_collection(collection_id, file_id)
except Exception as e:
self._log(
f"⚠️ Fehler beim Entfernen aus Collection {collection_id}: {e}",
level='warn'
)
# ========== Collection Management ==========
async def create_collection(
self,
name: str,
metadata: Optional[Dict[str, str]] = None,
field_definitions: Optional[List[Dict]] = None
) -> Dict:
"""
Erstellt eine neue xAI Collection.
POST https://management-api.x.ai/v1/collections
Args:
name: Collection name
metadata: Optional metadata dict
field_definitions: Optional field definitions for metadata fields
Returns:
Collection object mit 'id' field
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📚 Creating collection: {name}")
# Standard field definitions für document metadata
if field_definitions is None:
field_definitions = [
{"key": "document_name", "inject_into_chunk": True},
{"key": "description", "inject_into_chunk": True},
{"key": "created_at", "inject_into_chunk": False},
{"key": "modified_at", "inject_into_chunk": False},
{"key": "espocrm_id", "inject_into_chunk": False}
]
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Content-Type": "application/json"
}
body = {
"collection_name": name,
"field_definitions": field_definitions
}
# Add metadata if provided
if metadata:
body["metadata"] = metadata
async with session.post(url, json=body, headers=headers) as response:
if response.status not in (200, 201):
raw = await response.text()
raise RuntimeError(
f"Failed to create collection ({response.status}): {raw}"
)
data = await response.json()
# API returns 'collection_id' not 'id'
collection_id = data.get('collection_id') or data.get('id')
self._log(f"✅ Collection created: {collection_id}")
return data
async def get_collection(self, collection_id: str) -> Optional[Dict]:
"""
Holt Collection-Details.
GET https://management-api.x.ai/v1/collections/{collection_id}
Returns:
Collection object or None if not found
Raises:
RuntimeError: bei HTTP-Fehler (außer 404)
"""
self._log(f"📄 Getting collection: {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.get(url, headers=headers) as response:
if response.status == 404:
self._log(f"⚠️ Collection not found: {collection_id}", level='warn')
return None
if response.status not in (200,):
raw = await response.text()
raise RuntimeError(
f"Failed to get collection ({response.status}): {raw}"
)
data = await response.json()
self._log(f"✅ Collection retrieved: {data.get('collection_name', 'N/A')}")
return data
async def delete_collection(self, collection_id: str) -> None:
"""
Löscht eine XAI Collection.
DELETE https://management-api.x.ai/v1/collections/{collection_id}
NOTE: Documents in der Collection werden NICHT gelöscht!
Sie können noch in anderen Collections sein.
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"🗑️ Deleting collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.delete(url, headers=headers) as response:
if response.status not in (200, 204):
raw = await response.text()
raise RuntimeError(
f"Failed to delete collection {collection_id} ({response.status}): {raw}"
)
self._log(f"✅ Collection deleted: {collection_id}")
async def list_collection_documents(self, collection_id: str) -> List[Dict]:
"""
Listet alle Dokumente in einer Collection.
GET https://management-api.x.ai/v1/collections/{collection_id}/documents
Returns:
List von normalized document objects:
[
{
'file_id': 'file_...',
'filename': 'doc.pdf',
'blake3_hash': 'hex_string', # Plain hex, kein prefix
'size_bytes': 12345,
'content_type': 'application/pdf',
'fields': {}, # Custom metadata
'status': 'DOCUMENT_STATUS_...'
}
]
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📋 Listing documents in collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.get(url, headers=headers) as response:
if response.status not in (200,):
raw = await response.text()
raise RuntimeError(
f"Failed to list documents ({response.status}): {raw}"
)
data = await response.json()
# API gibt Liste zurück oder dict mit 'documents' key
if isinstance(data, list):
raw_documents = data
elif isinstance(data, dict) and 'documents' in data:
raw_documents = data['documents']
else:
raw_documents = []
# Normalize nested structure: file_metadata -> top-level
normalized = []
for doc in raw_documents:
file_meta = doc.get('file_metadata', {})
normalized.append({
'file_id': file_meta.get('file_id'),
'filename': file_meta.get('name'),
'blake3_hash': file_meta.get('hash'), # Plain hex string
'size_bytes': int(file_meta.get('size_bytes', 0)) if file_meta.get('size_bytes') else 0,
'content_type': file_meta.get('content_type'),
'created_at': file_meta.get('created_at'),
'fields': doc.get('fields', {}),
'status': doc.get('status')
})
self._log(f"✅ Listed {len(normalized)} documents")
return normalized
async def get_collection_document(self, collection_id: str, file_id: str) -> Optional[Dict]:
"""
Holt Dokument-Details aus einer XAI Collection.
GET https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
Returns:
Normalized dict mit document info:
{
'file_id': 'file_xyz',
'filename': 'document.pdf',
'blake3_hash': 'hex_string', # Plain hex, kein prefix
'size_bytes': 12345,
'content_type': 'application/pdf',
'fields': {...} # Custom metadata
}
Returns None if not found.
"""
self._log(f"📄 Getting document {file_id} from collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {"Authorization": f"Bearer {self.management_key}"}
async with session.get(url, headers=headers) as response:
if response.status == 404:
return None
if response.status not in (200,):
raw = await response.text()
raise RuntimeError(
f"Failed to get document from collection ({response.status}): {raw}"
)
data = await response.json()
# Normalize nested structure
file_meta = data.get('file_metadata', {})
normalized = {
'file_id': file_meta.get('file_id'),
'filename': file_meta.get('name'),
'blake3_hash': file_meta.get('hash'), # Plain hex
'size_bytes': int(file_meta.get('size_bytes', 0)) if file_meta.get('size_bytes') else 0,
'content_type': file_meta.get('content_type'),
'created_at': file_meta.get('created_at'),
'fields': data.get('fields', {}),
'status': data.get('status')
}
self._log(f"✅ Document info retrieved: {normalized.get('filename', 'N/A')}")
return normalized
async def update_document_metadata(
self,
collection_id: str,
file_id: str,
metadata: Dict[str, str]
) -> None:
"""
Aktualisiert nur Metadaten eines Documents (kein File-Upload).
PATCH https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
Args:
collection_id: XAI Collection ID
file_id: XAI file_id
metadata: Updated metadata fields
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📝 Updating metadata for document {file_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Content-Type": "application/json"
}
body = {"fields": metadata}
async with session.patch(url, json=body, headers=headers) as response:
if response.status not in (200, 204):
raw = await response.text()
raise RuntimeError(
f"Failed to update document metadata ({response.status}): {raw}"
)
self._log(f"✅ Metadata updated for {file_id}")
def is_mime_type_supported(self, mime_type: str) -> bool:
"""
Prüft, ob XAI diesen MIME-Type unterstützt.
Args:
mime_type: MIME type string
Returns:
True wenn unterstützt, False sonst
"""
# Liste der unterstützten MIME-Types basierend auf XAI Dokumentation
supported_types = {
# Documents
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.oasis.opendocument.text',
'application/epub+zip',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
# Text
'text/plain',
'text/html',
'text/markdown',
'text/csv',
'text/xml',
# Code
'text/javascript',
'application/json',
'application/xml',
'text/x-python',
'text/x-java-source',
'text/x-c',
'text/x-c++src',
# Other
'application/zip',
}
# Normalisiere MIME-Type (lowercase, strip whitespace)
normalized = mime_type.lower().strip()
return normalized in supported_types