Compare commits
2 Commits
e727582584
...
bb13d59ddb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb13d59ddb | ||
|
|
b0fceef4e2 |
@@ -282,7 +282,8 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
successful = 0
|
successful = 0
|
||||||
failed = 0
|
failed = 0
|
||||||
skipped = 0
|
skipped = 0
|
||||||
|
# Track aiDocumentIds for orphan detection (collected during sync)
|
||||||
|
synced_file_ids: set = set()
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
doc_id = doc['documentId']
|
doc_id = doc['documentId']
|
||||||
doc_name = doc.get('documentName', 'Unknown')
|
doc_name = doc.get('documentName', 'Unknown')
|
||||||
@@ -303,7 +304,17 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
if junction_status in ['new', 'unclean', 'failed']:
|
if junction_status in ['new', 'unclean', 'failed']:
|
||||||
needs_sync = True
|
needs_sync = True
|
||||||
reason = f"status={junction_status}"
|
reason = f"status={junction_status}"
|
||||||
elif junction_status == 'synced' and blake3_hash and ai_document_id:
|
elif junction_status == 'synced':
|
||||||
|
# Synced status should have both blake3_hash and ai_document_id
|
||||||
|
if not blake3_hash:
|
||||||
|
needs_sync = True
|
||||||
|
reason = "inconsistency: synced but no blake3 hash"
|
||||||
|
ctx.logger.warn(f" ⚠️ Synced document missing blake3 hash!")
|
||||||
|
elif not ai_document_id:
|
||||||
|
needs_sync = True
|
||||||
|
reason = "inconsistency: synced but no aiDocumentId"
|
||||||
|
ctx.logger.warn(f" ⚠️ Synced document missing aiDocumentId!")
|
||||||
|
else:
|
||||||
# Verify Blake3 hash with XAI (always, since hash from JunctionData API is free)
|
# Verify Blake3 hash with XAI (always, since hash from JunctionData API is free)
|
||||||
try:
|
try:
|
||||||
xai_doc_info = await xai.get_collection_document(collection_id, ai_document_id)
|
xai_doc_info = await xai.get_collection_document(collection_id, ai_document_id)
|
||||||
@@ -319,11 +330,17 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
else:
|
else:
|
||||||
needs_sync = True
|
needs_sync = True
|
||||||
reason = "file not found in XAI collection"
|
reason = "file not found in XAI collection"
|
||||||
|
ctx.logger.warn(f" ⚠️ Document marked synced but not in XAI!")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.logger.warn(f" ⚠️ Failed to verify Blake3: {e}")
|
needs_sync = True
|
||||||
|
reason = f"verification failed: {e}"
|
||||||
|
ctx.logger.warn(f" ⚠️ Failed to verify Blake3, will re-sync: {e}")
|
||||||
|
|
||||||
if not needs_sync:
|
if not needs_sync:
|
||||||
ctx.logger.info(f" ⏭️ Skipped (no sync needed)")
|
ctx.logger.info(f" ⏭️ Skipped (no sync needed)")
|
||||||
|
# Document is already synced, track its aiDocumentId
|
||||||
|
if ai_document_id:
|
||||||
|
synced_file_ids.add(ai_document_id)
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -338,24 +355,27 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
failed += 1
|
failed += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get attachment details for MIME type
|
# Get attachment details for MIME type and original filename
|
||||||
try:
|
try:
|
||||||
attachment = await espocrm.get_entity('Attachment', attachment_id)
|
attachment = await espocrm.get_entity('Attachment', attachment_id)
|
||||||
mime_type = attachment.get('type', 'application/octet-stream')
|
mime_type = attachment.get('type', 'application/octet-stream')
|
||||||
file_size = attachment.get('size', 0)
|
file_size = attachment.get('size', 0)
|
||||||
|
original_filename = attachment.get('name', doc_name) # Original filename with extension
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ctx.logger.warn(f" ⚠️ Failed to get attachment details: {e}, using defaults")
|
ctx.logger.warn(f" ⚠️ Failed to get attachment details: {e}, using defaults")
|
||||||
mime_type = 'application/octet-stream'
|
mime_type = 'application/octet-stream'
|
||||||
file_size = 0
|
file_size = 0
|
||||||
|
original_filename = doc_name
|
||||||
|
|
||||||
ctx.logger.info(f" 📎 Attachment: {attachment_id} ({mime_type}, {file_size} bytes)")
|
ctx.logger.info(f" 📎 Attachment: {attachment_id} ({mime_type}, {file_size} bytes)")
|
||||||
|
ctx.logger.info(f" 📄 Original filename: {original_filename}")
|
||||||
|
|
||||||
# Download document
|
# Download document
|
||||||
file_content = await espocrm.download_attachment(attachment_id)
|
file_content = await espocrm.download_attachment(attachment_id)
|
||||||
ctx.logger.info(f" 📥 Downloaded {len(file_content)} bytes")
|
ctx.logger.info(f" 📥 Downloaded {len(file_content)} bytes")
|
||||||
|
|
||||||
# Upload to XAI
|
# Upload to XAI with original filename (includes extension)
|
||||||
filename = doc_name
|
filename = original_filename
|
||||||
|
|
||||||
xai_file_id = await xai.upload_file(file_content, filename, mime_type)
|
xai_file_id = await xai.upload_file(file_content, filename, mime_type)
|
||||||
ctx.logger.info(f" 📤 Uploaded to XAI: {xai_file_id}")
|
ctx.logger.info(f" 📤 Uploaded to XAI: {xai_file_id}")
|
||||||
@@ -376,6 +396,9 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
)
|
)
|
||||||
ctx.logger.info(f" ✅ Junction updated")
|
ctx.logger.info(f" ✅ Junction updated")
|
||||||
|
|
||||||
|
# Track the new aiDocumentId for orphan detection
|
||||||
|
synced_file_ids.add(xai_file_id)
|
||||||
|
|
||||||
successful += 1
|
successful += 1
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -403,11 +426,12 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
xai_documents = await xai.list_collection_documents(collection_id)
|
xai_documents = await xai.list_collection_documents(collection_id)
|
||||||
xai_file_ids = {doc.get('file_id') for doc in xai_documents if doc.get('file_id')}
|
xai_file_ids = {doc.get('file_id') for doc in xai_documents if doc.get('file_id')}
|
||||||
|
|
||||||
# Get all ai_document_ids from junction
|
# Use synced_file_ids (collected during this sync) for orphan detection
|
||||||
junction_file_ids = {doc.get('aiDocumentId') for doc in documents if doc.get('aiDocumentId')}
|
# This includes both pre-existing synced docs and newly uploaded ones
|
||||||
|
ctx.logger.info(f" XAI has {len(xai_file_ids)} files, we have {len(synced_file_ids)} synced")
|
||||||
|
|
||||||
# Find orphans (in XAI but not in junction)
|
# Find orphans (in XAI but not in our current sync)
|
||||||
orphans = xai_file_ids - junction_file_ids
|
orphans = xai_file_ids - synced_file_ids
|
||||||
|
|
||||||
if orphans:
|
if orphans:
|
||||||
ctx.logger.info(f" Found {len(orphans)} orphaned file(s)")
|
ctx.logger.info(f" Found {len(orphans)} orphaned file(s)")
|
||||||
@@ -432,7 +456,7 @@ class AIKnowledgeSync(BaseSyncUtils):
|
|||||||
ctx.logger.info(f" ✅ Synced: {successful}")
|
ctx.logger.info(f" ✅ Synced: {successful}")
|
||||||
ctx.logger.info(f" ⏭️ Skipped: {skipped}")
|
ctx.logger.info(f" ⏭️ Skipped: {skipped}")
|
||||||
ctx.logger.info(f" ❌ Failed: {failed}")
|
ctx.logger.info(f" ❌ Failed: {failed}")
|
||||||
ctx.logger.info(f" Mode: {'FULL SYNC (Blake3 verification)' if full_sync else 'INCREMENTAL'}")
|
ctx.logger.info(f" Mode: Blake3 hash verification enabled")
|
||||||
ctx.logger.info("=" * 80)
|
ctx.logger.info("=" * 80)
|
||||||
|
|
||||||
def _calculate_metadata_hash(self, document: Dict) -> str:
|
def _calculate_metadata_hash(self, document: Dict) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user