feat(sync): Update RAGFlow dataset creation to use stable EspoCRM-ID and improve logging
This commit is contained in:
@@ -7,9 +7,31 @@ from services.logging_utils import get_service_logger
|
|||||||
|
|
||||||
RAGFLOW_DEFAULT_BASE_URL = "http://192.168.1.64:9380"
|
RAGFLOW_DEFAULT_BASE_URL = "http://192.168.1.64:9380"
|
||||||
|
|
||||||
# Defaults fuer Dokument-Analyse
|
# Knowledge-Graph Dataset Konfiguration
|
||||||
RAGFLOW_AUTO_KEYWORDS = 14
|
# Hinweis: llm_id kann nur über die RAGflow Web-UI gesetzt werden (API erlaubt es nicht)
|
||||||
RAGFLOW_AUTO_QUESTIONS = 7
|
RAGFLOW_KG_ENTITY_TYPES = [
|
||||||
|
'Partei',
|
||||||
|
'Anspruch',
|
||||||
|
'Anspruchsgrundlage',
|
||||||
|
'unstreitiger Sachverhalt',
|
||||||
|
'streitiger Sachverhalt',
|
||||||
|
'streitige Rechtsfrage',
|
||||||
|
'Beweismittel',
|
||||||
|
'Beweisangebot',
|
||||||
|
'Norm',
|
||||||
|
'Gerichtsentscheidung',
|
||||||
|
'Forderung',
|
||||||
|
'Beweisergebnis',
|
||||||
|
]
|
||||||
|
RAGFLOW_KG_PARSER_CONFIG = {
|
||||||
|
'raptor': {'use_raptor': False},
|
||||||
|
'graphrag': {
|
||||||
|
'use_graphrag': True,
|
||||||
|
'method': 'general',
|
||||||
|
'resolution': True,
|
||||||
|
'entity_types': RAGFLOW_KG_ENTITY_TYPES,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _base_to_dict(obj: Any) -> Any:
|
def _base_to_dict(obj: Any) -> Any:
|
||||||
@@ -101,22 +123,23 @@ class RAGFlowService:
|
|||||||
chunk_method: str = 'laws',
|
chunk_method: str = 'laws',
|
||||||
embedding_model: Optional[str] = None,
|
embedding_model: Optional[str] = None,
|
||||||
description: Optional[str] = None,
|
description: Optional[str] = None,
|
||||||
auto_keywords: int = RAGFLOW_AUTO_KEYWORDS,
|
|
||||||
auto_questions: int = RAGFLOW_AUTO_QUESTIONS,
|
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
"""
|
"""
|
||||||
Erstellt ein neues RAGFlow Dataset (entspricht xAI Collection).
|
Erstellt ein neues RAGFlow Dataset mit Knowledge-Graph Konfiguration.
|
||||||
|
|
||||||
Verwendet standardmaessig chunk_method='laws' (optimiert fuer Rechtsdokumente).
|
Ablauf:
|
||||||
Setzt nach der Erstellung auto_keywords=14 und auto_questions=7.
|
1. create_dataset(chunk_method='laws') via SDK
|
||||||
|
2. dataset.update(parser_config={graphrag, raptor}) via SDK
|
||||||
|
(graphrag: use_graphrag=True, method=general, resolution=True,
|
||||||
|
entity_types=deutsche Rechtsbegriffe, raptor=False)
|
||||||
|
|
||||||
|
Hinweis: llm_id fuer die KG-Extraktion muss in der RAGflow Web-UI
|
||||||
|
gesetzt werden – die API erlaubt es nicht.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict mit 'id', 'name', 'chunk_method', 'parser_config', etc.
|
dict mit 'id', 'name', 'chunk_method', 'parser_config', etc.
|
||||||
"""
|
"""
|
||||||
self._log(
|
self._log(f"📚 Creating dataset: {name} (chunk_method={chunk_method}, graphrag=True)")
|
||||||
f"📚 Creating dataset: {name} "
|
|
||||||
f"(chunk_method={chunk_method}, keywords={auto_keywords}, questions={auto_questions})"
|
|
||||||
)
|
|
||||||
|
|
||||||
def _create():
|
def _create():
|
||||||
rag = self._get_client()
|
rag = self._get_client()
|
||||||
@@ -126,14 +149,9 @@ class RAGFlowService:
|
|||||||
if description:
|
if description:
|
||||||
kwargs['description'] = description
|
kwargs['description'] = description
|
||||||
dataset = rag.create_dataset(**kwargs)
|
dataset = rag.create_dataset(**kwargs)
|
||||||
|
# graphrag + raptor werden via update() gesetzt
|
||||||
# parser_config kann erst nach create via update() gesetzt werden
|
# llm_id kann nur über die RAGflow Web-UI konfiguriert werden
|
||||||
dataset.update({
|
dataset.update({'parser_config': RAGFLOW_KG_PARSER_CONFIG})
|
||||||
'parser_config': {
|
|
||||||
'auto_keywords': auto_keywords,
|
|
||||||
'auto_questions': auto_questions,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return self._dataset_to_dict(dataset)
|
return self._dataset_to_dict(dataset)
|
||||||
|
|
||||||
result = await self._run(_create)
|
result = await self._run(_create)
|
||||||
@@ -164,8 +182,6 @@ class RAGFlowService:
|
|||||||
chunk_method: str = 'laws',
|
chunk_method: str = 'laws',
|
||||||
embedding_model: Optional[str] = None,
|
embedding_model: Optional[str] = None,
|
||||||
description: Optional[str] = None,
|
description: Optional[str] = None,
|
||||||
auto_keywords: int = RAGFLOW_AUTO_KEYWORDS,
|
|
||||||
auto_questions: int = RAGFLOW_AUTO_QUESTIONS,
|
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
"""
|
"""
|
||||||
Gibt bestehendes Dataset zurueck oder erstellt ein neues (get-or-create).
|
Gibt bestehendes Dataset zurueck oder erstellt ein neues (get-or-create).
|
||||||
@@ -183,8 +199,6 @@ class RAGFlowService:
|
|||||||
chunk_method=chunk_method,
|
chunk_method=chunk_method,
|
||||||
embedding_model=embedding_model,
|
embedding_model=embedding_model,
|
||||||
description=description,
|
description=description,
|
||||||
auto_keywords=auto_keywords,
|
|
||||||
auto_questions=auto_questions,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
async def delete_dataset(self, dataset_id: str) -> None:
|
async def delete_dataset(self, dataset_id: str) -> None:
|
||||||
|
|||||||
@@ -531,8 +531,10 @@ async def _run_ragflow_sync(
|
|||||||
if not dataset_id:
|
if not dataset_id:
|
||||||
if ai_aktivierungsstatus == 'new':
|
if ai_aktivierungsstatus == 'new':
|
||||||
akte_name = akte.get('name') or f"Akte {akte.get('aktennummer', akte_id)}"
|
akte_name = akte.get('name') or f"Akte {akte.get('aktennummer', akte_id)}"
|
||||||
ctx.logger.info(f" Status 'new' → Erstelle neues RAGflow Dataset für '{akte_name}'...")
|
# Name = EspoCRM-ID (stabil, eindeutig, kein Sonderzeichen-Problem)
|
||||||
dataset_info = await ragflow.ensure_dataset(akte_name)
|
dataset_name = akte_id
|
||||||
|
ctx.logger.info(f" Status 'new' → Erstelle neues RAGflow Dataset '{dataset_name}' für '{akte_name}'...")
|
||||||
|
dataset_info = await ragflow.ensure_dataset(dataset_name)
|
||||||
if not dataset_info or not dataset_info.get('id'):
|
if not dataset_info or not dataset_info.get('id'):
|
||||||
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden – Sync abgebrochen")
|
ctx.logger.error("❌ RAGflow Dataset konnte nicht erstellt werden – Sync abgebrochen")
|
||||||
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
|
await espocrm.update_entity('CAkten', akte_id, {'aiSyncStatus': 'failed'})
|
||||||
|
|||||||
Reference in New Issue
Block a user