feat: Enhance document synchronization by integrating CAIKnowledge handling and improving error logging

This commit is contained in:
bsiggel
2026-03-12 22:30:11 +00:00
parent 8ed7cca432
commit 6bf2343a12
6 changed files with 492 additions and 362 deletions

View File

@@ -58,6 +58,10 @@ class EspoCRMAPI:
self._entity_defs_cache: Dict[str, Dict[str, Any]] = {}
self._entity_defs_cache_ttl_seconds = int(os.getenv('ESPOCRM_METADATA_TTL_SECONDS', '300'))
# Metadata cache (complete metadata loaded once)
self._metadata_cache: Optional[Dict[str, Any]] = None
self._metadata_cache_ts: float = 0
# Optional Redis for caching/rate limiting (centralized)
self.redis_client = get_redis_client(strict=False)
if self.redis_client:
@@ -87,20 +91,76 @@ class EspoCRMAPI:
if self._session and not self._session.closed:
await self._session.close()
async def get_entity_def(self, entity_type: str) -> Dict[str, Any]:
async def get_metadata(self) -> Dict[str, Any]:
"""
Get complete EspoCRM metadata (cached).
Loads once and caches for TTL duration.
Much faster than individual entity def calls.
Returns:
Complete metadata dict with entityDefs, clientDefs, etc.
"""
now = time.monotonic()
cached = self._entity_defs_cache.get(entity_type)
if cached and (now - cached['ts']) < self._entity_defs_cache_ttl_seconds:
return cached['data']
# Return cached if still valid
if (self._metadata_cache is not None and
(now - self._metadata_cache_ts) < self._entity_defs_cache_ttl_seconds):
return self._metadata_cache
# Load fresh metadata
try:
data = await self.api_call(f"/Metadata/EntityDefs/{entity_type}", method='GET')
except EspoCRMAPIError:
all_defs = await self.api_call("/Metadata/EntityDefs", method='GET')
data = all_defs.get(entity_type, {}) if isinstance(all_defs, dict) else {}
self._log("📥 Loading complete EspoCRM metadata...", level='debug')
metadata = await self.api_call("/Metadata", method='GET')
if not isinstance(metadata, dict):
self._log("⚠️ Metadata response is not a dict, using empty", level='warn')
metadata = {}
# Cache it
self._metadata_cache = metadata
self._metadata_cache_ts = now
entity_count = len(metadata.get('entityDefs', {}))
self._log(f"✅ Metadata cached: {entity_count} entity definitions", level='debug')
return metadata
except Exception as e:
self._log(f"❌ Failed to load metadata: {e}", level='error')
# Return empty dict as fallback
return {}
self._entity_defs_cache[entity_type] = {'ts': now, 'data': data}
return data
async def get_entity_def(self, entity_type: str) -> Dict[str, Any]:
"""
Get entity definition for a specific entity type (cached via metadata).
Uses complete metadata cache - much faster and correct API usage.
Args:
entity_type: Entity type (e.g., 'Document', 'CDokumente', 'Account')
Returns:
Entity definition dict with fields, links, etc.
"""
try:
metadata = await self.get_metadata()
entity_defs = metadata.get('entityDefs', {})
if not isinstance(entity_defs, dict):
self._log(f"⚠️ entityDefs is not a dict for {entity_type}", level='warn')
return {}
entity_def = entity_defs.get(entity_type, {})
if not entity_def:
self._log(f"⚠️ No entity definition found for '{entity_type}'", level='debug')
return entity_def
except Exception as e:
self._log(f"⚠️ Could not load entity def for {entity_type}: {e}", level='warn')
return {}
async def api_call(
self,
@@ -540,3 +600,131 @@ class EspoCRMAPI:
)
"""
await self.update_entity(junction_entity, junction_id, fields)
async def get_knowledge_documents_with_junction(
self,
knowledge_id: str
) -> List[Dict[str, Any]]:
"""
Get all documents linked to a CAIKnowledge entry with junction data.
Uses custom EspoCRM endpoint: GET /JunctionData/CAIKnowledge/{knowledge_id}/dokumentes
Returns enriched list with:
- junctionId: Junction table ID
- cAIKnowledgeId, cDokumenteId: Junction keys
- aiDocumentId: XAI document ID from junction
- syncstatus: Sync status from junction (new, synced, failed, unclean)
- lastSync: Last sync timestamp from junction
- documentId, documentName: Document info
- blake3hash: Blake3 hash from document entity
- documentCreatedAt, documentModifiedAt: Document timestamps
This consolidates multiple API calls into one efficient query.
Args:
knowledge_id: CAIKnowledge entity ID
Returns:
List of document dicts with junction data
Example:
docs = await espocrm.get_knowledge_documents_with_junction('69b1b03582bb6e2da')
for doc in docs:
print(f"{doc['documentName']}: {doc['syncstatus']}")
"""
# JunctionData endpoint is at root level, not under /api/v1
base_url = self.api_base_url.rstrip('/').replace('/api/v1', '')
url = f"{base_url}/JunctionData/CAIKnowledge/{knowledge_id}/dokumentes"
self._log(f"GET {url}")
try:
session = await self._get_session()
timeout = aiohttp.ClientTimeout(total=self.api_timeout_seconds)
async with session.get(url, headers=self._get_headers(), timeout=timeout) as response:
self._log(f"Response status: {response.status}")
if response.status == 404:
# Knowledge base not found or no documents linked
return []
if response.status >= 400:
error_text = await response.text()
raise EspoCRMAPIError(f"JunctionData GET failed: {response.status} - {error_text}")
result = await response.json()
documents = result.get('list', [])
self._log(f"✅ Loaded {len(documents)} document(s) with junction data")
return documents
except asyncio.TimeoutError:
raise EspoCRMTimeoutError(f"Timeout getting junction data for knowledge {knowledge_id}")
except aiohttp.ClientError as e:
raise EspoCRMAPIError(f"Network error getting junction data: {e}")
async def update_knowledge_document_junction(
self,
knowledge_id: str,
document_id: str,
fields: Dict[str, Any],
update_last_sync: bool = True
) -> Dict[str, Any]:
"""
Update junction columns for a specific document link.
Uses custom EspoCRM endpoint:
PUT /JunctionData/CAIKnowledge/{knowledge_id}/dokumentes/{document_id}
Args:
knowledge_id: CAIKnowledge entity ID
document_id: CDokumente entity ID
fields: Junction fields to update (aiDocumentId, syncstatus, etc.)
update_last_sync: Whether to update lastSync timestamp (default: True)
Returns:
Updated junction data
Example:
await espocrm.update_knowledge_document_junction(
'69b1b03582bb6e2da',
'69a68b556a39771bf',
{
'aiDocumentId': 'xai-file-abc123',
'syncstatus': 'synced'
},
update_last_sync=True
)
"""
# JunctionData endpoint is at root level, not under /api/v1
base_url = self.api_base_url.rstrip('/').replace('/api/v1', '')
url = f"{base_url}/JunctionData/CAIKnowledge/{knowledge_id}/dokumentes/{document_id}"
payload = {**fields}
if update_last_sync:
payload['updateLastSync'] = True
self._log(f"PUT {url}")
self._log(f" Payload: {payload}")
try:
session = await self._get_session()
timeout = aiohttp.ClientTimeout(total=self.api_timeout_seconds)
async with session.put(url, headers=self._get_headers(), json=payload, timeout=timeout) as response:
self._log(f"Response status: {response.status}")
if response.status >= 400:
error_text = await response.text()
raise EspoCRMAPIError(f"JunctionData PUT failed: {response.status} - {error_text}")
result = await response.json()
self._log(f"✅ Junction updated: junctionId={result.get('junctionId')}")
return result
except asyncio.TimeoutError:
raise EspoCRMTimeoutError(f"Timeout updating junction data")
except aiohttp.ClientError as e:
raise EspoCRMAPIError(f"Network error updating junction data: {e}")