fix: Normalize MIME type for PDF uploads and update collection management endpoint to use vector store API

This commit is contained in:
bsiggel
2026-03-15 16:34:13 +00:00
parent eaab14ae57
commit 59fdd7d9ec

View File

@@ -63,7 +63,13 @@ class XAIService:
Raises:
RuntimeError: bei HTTP-Fehler oder fehlendem file_id in der Antwort
"""
self._log(f"📤 Uploading {len(file_content)} bytes to xAI: {filename}")
# Normalize MIME type: xAI needs correct Content-Type for proper processing
# If generic octet-stream but file is clearly a PDF, fix it
if mime_type == 'application/octet-stream' and filename.lower().endswith('.pdf'):
mime_type = 'application/pdf'
self._log(f"⚠️ Corrected MIME type to application/pdf for {filename}")
self._log(f"📤 Uploading {len(file_content)} bytes to xAI: {filename} ({mime_type})")
session = await self._get_session()
url = f"{XAI_FILES_URL}/v1/files"
@@ -79,6 +85,9 @@ class XAIService:
filename=filename,
content_type=mime_type
)
# CRITICAL: purpose="file_search" enables proper PDF processing
# Without this, xAI throws "internal error" on complex PDFs
form.add_field('purpose', 'file_search')
async with session.post(url, data=form, headers=headers) as response:
try:
@@ -103,9 +112,12 @@ class XAIService:
async def add_to_collection(self, collection_id: str, file_id: str) -> None:
"""
Fügt eine Datei einer xAI-Collection hinzu.
Fügt eine Datei einer xAI-Collection (Vector Store) hinzu.
POST https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
POST https://api.x.ai/v1/vector_stores/{vector_store_id}/files
Uses the OpenAI-compatible API pattern for adding files to vector stores.
This triggers proper indexing and processing.
Raises:
RuntimeError: bei HTTP-Fehler
@@ -113,13 +125,16 @@ class XAIService:
self._log(f"📚 Adding file {file_id} to collection {collection_id}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
# Use the OpenAI-compatible endpoint (not management API)
url = f"{XAI_FILES_URL}/v1/vector_stores/{collection_id}/files"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
async with session.post(url, headers=headers) as response:
payload = {"file_id": file_id}
async with session.post(url, json=payload, headers=headers) as response:
if response.status not in (200, 201):
raw = await response.text()
raise RuntimeError(