feat(sync): Implement orphan cleanup for xAI documents without EspoCRM equivalents

This commit is contained in:
bsiggel
2026-03-26 14:20:33 +00:00
parent 1e202a6233
commit c9bdd021e4
3 changed files with 142 additions and 52 deletions

View File

@@ -85,9 +85,7 @@ class XAIService:
filename=filename,
content_type=mime_type
)
# CRITICAL: purpose="file_search" enables proper PDF processing
# Without this, xAI throws "internal error" on complex PDFs
form.add_field('purpose', 'file_search')
form.add_field('purpose', 'assistants')
async with session.post(url, data=form, headers=headers) as response:
try:
@@ -134,6 +132,85 @@ class XAIService:
self._log(f"✅ File {file_id} added to collection {collection_id}")
async def upload_to_collection(
self,
collection_id: str,
file_content: bytes,
filename: str,
mime_type: str = 'application/octet-stream',
fields: Optional[Dict[str, str]] = None,
) -> str:
"""
Lädt eine Datei direkt in eine xAI-Collection hoch (ein Request, inkl. Metadata).
POST https://management-api.x.ai/v1/collections/{collection_id}/documents
Content-Type: multipart/form-data
Args:
collection_id: Ziel-Collection
file_content: Dateiinhalt als Bytes
filename: Dateiname (inkl. Endung)
mime_type: MIME-Type
fields: Custom Metadaten-Felder (entsprechen den field_definitions)
Returns:
xAI file_id (str)
Raises:
RuntimeError: bei HTTP-Fehler oder fehlendem file_id in der Antwort
"""
import json as _json
if mime_type == 'application/octet-stream' and filename.lower().endswith('.pdf'):
mime_type = 'application/pdf'
self._log(
f"📤 Uploading {len(file_content)} bytes to collection {collection_id}: "
f"{filename} ({mime_type})"
)
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents"
headers = {"Authorization": f"Bearer {self.management_key}"}
form = aiohttp.FormData(quote_fields=False)
form.add_field('name', filename)
form.add_field(
'data',
file_content,
filename=filename,
content_type=mime_type,
)
form.add_field('content_type', mime_type)
if fields:
form.add_field('fields', _json.dumps(fields))
async with session.post(url, data=form, headers=headers) as response:
try:
data = await response.json()
except Exception:
raw = await response.text()
data = {"_raw": raw}
if response.status not in (200, 201):
raise RuntimeError(
f"upload_to_collection failed ({response.status}): {data}"
)
# Response may nest the file_id in different places
file_id = (
data.get('file_id')
or (data.get('file_metadata') or {}).get('file_id')
or data.get('id')
)
if not file_id:
raise RuntimeError(
f"No file_id in upload_to_collection response: {data}"
)
self._log(f"✅ Uploaded to collection {collection_id}: {file_id}")
return file_id
async def remove_from_collection(self, collection_id: str, file_id: str) -> None:
"""
Entfernt eine Datei aus einer xAI-Collection.
@@ -194,7 +271,6 @@ class XAIService:
async def create_collection(
self,
name: str,
metadata: Optional[Dict[str, str]] = None,
field_definitions: Optional[List[Dict]] = None
) -> Dict:
"""
@@ -204,7 +280,6 @@ class XAIService:
Args:
name: Collection name
metadata: Optional metadata dict
field_definitions: Optional field definitions for metadata fields
Returns:
@@ -239,10 +314,6 @@ class XAIService:
"field_definitions": field_definitions
}
# Add metadata if provided
if metadata:
body["metadata"] = metadata
async with session.post(url, json=body, headers=headers) as response:
if response.status not in (200, 201):
raw = await response.text()
@@ -435,44 +506,45 @@ class XAIService:
self._log(f"✅ Document info retrieved: {normalized.get('filename', 'N/A')}")
return normalized
async def update_document_metadata(
async def rename_file(
self,
collection_id: str,
file_id: str,
metadata: Dict[str, str]
new_filename: str,
) -> None:
"""
Aktualisiert nur Metadaten eines Documents (kein File-Upload).
Benennt eine Datei auf Files-API-Ebene um (kein Re-Upload).
PATCH https://management-api.x.ai/v1/collections/{collection_id}/documents/{file_id}
PUT https://api.x.ai/v1/files/{file_id}
Laut xAI-Dokumentation können über diesen Endpunkt Dateiname und
content_type geändert werden keine custom metadata-Felder.
Args:
collection_id: XAI Collection ID
file_id: XAI file_id
metadata: Updated metadata fields
file_id: xAI file_id
new_filename: Neuer Dateiname
Raises:
RuntimeError: bei HTTP-Fehler
"""
self._log(f"📝 Updating metadata for document {file_id}")
self._log(f"✏️ Renaming file {file_id}{new_filename}")
session = await self._get_session()
url = f"{XAI_MANAGEMENT_URL}/v1/collections/{collection_id}/documents/{file_id}"
url = f"{XAI_FILES_URL}/v1/files/{file_id}"
headers = {
"Authorization": f"Bearer {self.management_key}",
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
body = {"fields": metadata}
body = {"filename": new_filename}
async with session.patch(url, json=body, headers=headers) as response:
async with session.put(url, json=body, headers=headers) as response:
if response.status not in (200, 204):
raw = await response.text()
raise RuntimeError(
f"Failed to update document metadata ({response.status}): {raw}"
f"Failed to rename file {file_id} ({response.status}): {raw}"
)
self._log(f"Metadata updated for {file_id}")
self._log(f"File renamed: {file_id}{new_filename}")
def is_mime_type_supported(self, mime_type: str) -> bool:
"""