diff --git a/src/steps/crm/akte/akte_sync_event_step.py b/src/steps/crm/akte/akte_sync_event_step.py index 467a99b..e9f55ae 100644 --- a/src/steps/crm/akte/akte_sync_event_step.py +++ b/src/steps/crm/akte/akte_sync_event_step.py @@ -674,6 +674,63 @@ async def _run_ragflow_sync( file_content = await espocrm.download_attachment(attachment_id) ctx.logger.info(f" Downloaded {len(file_content)} bytes") + # ── EML → TXT Konvertierung ─────────────────────────────── + if filename.lower().endswith('.eml'): + try: + import email as _email + from bs4 import BeautifulSoup + msg = _email.message_from_bytes(file_content) + subject = msg.get('Subject', '') + from_ = msg.get('From', '') + date = msg.get('Date', '') + plain_parts, html_parts = [], [] + if msg.is_multipart(): + for part in msg.walk(): + ct = part.get_content_type() + if ct == 'text/plain': + plain_parts.append(part.get_payload(decode=True).decode( + part.get_content_charset() or 'utf-8', errors='replace')) + elif ct == 'text/html': + html_parts.append(part.get_payload(decode=True).decode( + part.get_content_charset() or 'utf-8', errors='replace')) + else: + ct = msg.get_content_type() + payload = msg.get_payload(decode=True).decode( + msg.get_content_charset() or 'utf-8', errors='replace') + if ct == 'text/html': + html_parts.append(payload) + else: + plain_parts.append(payload) + if plain_parts: + body = '\n\n'.join(plain_parts) + elif html_parts: + soup = BeautifulSoup('\n'.join(html_parts), 'html.parser') + for tag in soup(['script', 'style', 'header', 'footer', 'nav']): + tag.decompose() + body = '\n'.join( + line.strip() + for line in soup.get_text(separator='\n').splitlines() + if line.strip() + ) + else: + body = '' + header = ( + f"Betreff: {subject}\n" + f"Von: {from_}\n" + f"Datum: {date}\n" + f"{'-' * 80}\n\n" + ) + converted_text = (header + body).strip() + file_content = converted_text.encode('utf-8') + filename = filename[:-4] + '.txt' + mime_type = 'text/plain' + ctx.logger.info( + f" 📧 EML→TXT konvertiert: {len(file_content)} bytes " + f"(blake3 des Original-EML bleibt erhalten)" + ) + except Exception as eml_err: + ctx.logger.warn(f" ⚠️ EML-Konvertierung fehlgeschlagen, lade roh hoch: {eml_err}") + ctx.logger.info(f" 📤 Uploading '{filename}' ({mime_type})…") result = await ragflow.upload_document( dataset_id=dataset_id,