feat(cron): Add RAGflow Graph Build Cron for periodic status updates and new builds

This commit is contained in:
bsiggel
2026-03-27 11:27:09 +00:00
parent a2181a25fc
commit 88c9df5995
2 changed files with 236 additions and 0 deletions

View File

@@ -426,6 +426,66 @@ class RAGFlowService:
self._log(f"📄 Document found: {result.get('name')} (run={result.get('run')})")
return result
async def trace_graphrag(self, dataset_id: str) -> Optional[Dict]:
"""
Gibt den aktuellen Status des Knowledge-Graph-Builds zurueck.
GET /api/v1/datasets/{dataset_id}/trace_graphrag
Returns:
Dict mit 'progress' (0.0-1.0), 'task_id', 'progress_msg' etc.
None wenn noch kein Graph-Build gestartet wurde.
"""
import aiohttp
url = f"{self.base_url.rstrip('/')}/api/v1/datasets/{dataset_id}/trace_graphrag"
headers = {'Authorization': f'Bearer {self.api_key}'}
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
if resp.status not in (200, 201):
text = await resp.text()
raise RuntimeError(
f"trace_graphrag HTTP {resp.status} fuer dataset {dataset_id}: {text}"
)
data = await resp.json()
task = data.get('data')
if not task:
return None
return {
'task_id': task.get('id', ''),
'progress': float(task.get('progress', 0.0)),
'progress_msg': task.get('progress_msg', ''),
'begin_at': task.get('begin_at'),
'update_date': task.get('update_date'),
}
async def run_graphrag(self, dataset_id: str) -> str:
"""
Startet bzw. aktualisiert den Knowledge Graph eines Datasets
via POST /api/v1/datasets/{id}/run_graphrag.
Returns:
graphrag_task_id (str) leer wenn der Server keinen zurueckgibt.
"""
import aiohttp
url = f"{self.base_url.rstrip('/')}/api/v1/datasets/{dataset_id}/run_graphrag"
headers = {
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json',
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json={}) as resp:
if resp.status not in (200, 201):
text = await resp.text()
raise RuntimeError(
f"run_graphrag HTTP {resp.status} fuer dataset {dataset_id}: {text}"
)
data = await resp.json()
task_id = (data.get('data') or {}).get('graphrag_task_id', '')
self._log(
f"🔗 run_graphrag angestossen fuer {dataset_id[:16]}"
+ (f" task_id={task_id}" if task_id else "")
)
return task_id
async def wait_for_parsing(
self,
dataset_id: str,