Skip to content

Commit 69b4cda

Browse files
authored
Merge pull request HKUDS#2265 from danielaskdd/edit-kg-new
Refactor: Enhance KG Editing with Chunk Tracking
2 parents 11f1f36 + 6015e8b commit 69b4cda

File tree

4 files changed

+530
-122
lines changed

4 files changed

+530
-122
lines changed

‎lightrag/api/routers/graph_routes.py‎

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ async def create_entity(request: EntityCreateRequest):
299299
entity_data (dict): Entity properties including:
300300
- description (str): Textual description of the entity
301301
- entity_type (str): Category/type of the entity (e.g., PERSON, ORGANIZATION, LOCATION)
302+
- source_id (str): Related chunk_id from which the description originates
302303
- Additional custom properties as needed
303304
304305
Response Schema:
@@ -309,6 +310,7 @@ async def create_entity(request: EntityCreateRequest):
309310
"entity_name": "Tesla",
310311
"description": "Electric vehicle manufacturer",
311312
"entity_type": "ORGANIZATION",
313+
"source_id": "chunk-123<SEP>chunk-456"
312314
... (other entity properties)
313315
}
314316
}
@@ -361,10 +363,11 @@ async def create_relation(request: RelationCreateRequest):
361363
"""
362364
Create a new relationship between two entities in the knowledge graph
363365
364-
This endpoint establishes a directed relationship between two existing entities.
365-
Both the source and target entities must already exist in the knowledge graph.
366-
The system automatically generates vector embeddings for the relationship to
367-
enable semantic search and graph traversal.
366+
This endpoint establishes an undirected relationship between two existing entities.
367+
The provided source/target order is accepted for convenience, but the backend
368+
stored edge is undirected and may be returned with the entities swapped.
369+
Both entities must already exist in the knowledge graph. The system automatically
370+
generates vector embeddings for the relationship to enable semantic search and graph traversal.
368371
369372
Prerequisites:
370373
- Both source_entity and target_entity must exist in the knowledge graph
@@ -376,6 +379,7 @@ async def create_relation(request: RelationCreateRequest):
376379
relation_data (dict): Relationship properties including:
377380
- description (str): Textual description of the relationship
378381
- keywords (str): Comma-separated keywords describing the relationship type
382+
- source_id (str): Related chunk_id from which the description originates
379383
- weight (float): Relationship strength/importance (default: 1.0)
380384
- Additional custom properties as needed
381385
@@ -388,6 +392,7 @@ async def create_relation(request: RelationCreateRequest):
388392
"tgt_id": "Tesla",
389393
"description": "Elon Musk is the CEO of Tesla",
390394
"keywords": "CEO, founder",
395+
"source_id": "chunk-123<SEP>chunk-456"
391396
"weight": 1.0,
392397
... (other relationship properties)
393398
}

‎lightrag/lightrag.py‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3582,6 +3582,7 @@ async def aedit_entity(
35823582
"""Asynchronously edit entity information.
35833583
35843584
Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
3585+
Also synchronizes entity_chunks_storage and relation_chunks_storage to track chunk references.
35853586
35863587
Args:
35873588
entity_name: Name of the entity to edit
@@ -3600,6 +3601,8 @@ async def aedit_entity(
36003601
entity_name,
36013602
updated_data,
36023603
allow_rename,
3604+
self.entity_chunks,
3605+
self.relation_chunks,
36033606
)
36043607

36053608
def edit_entity(
@@ -3616,6 +3619,7 @@ async def aedit_relation(
36163619
"""Asynchronously edit relation information.
36173620
36183621
Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
3622+
Also synchronizes the relation_chunks_storage to track which chunks reference this relation.
36193623
36203624
Args:
36213625
source_entity: Name of the source entity
@@ -3634,6 +3638,7 @@ async def aedit_relation(
36343638
source_entity,
36353639
target_entity,
36363640
updated_data,
3641+
self.relation_chunks,
36373642
)
36383643

36393644
def edit_relation(

‎lightrag/utils.py‎

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,6 +2551,52 @@ def apply_source_ids_limit(
25512551
return truncated
25522552

25532553

2554+
def compute_incremental_chunk_ids(
2555+
existing_full_chunk_ids: list[str],
2556+
old_chunk_ids: list[str],
2557+
new_chunk_ids: list[str],
2558+
) -> list[str]:
2559+
"""
2560+
Compute incrementally updated chunk IDs based on changes.
2561+
2562+
This function applies delta changes (additions and removals) to an existing
2563+
list of chunk IDs while maintaining order and ensuring deduplication.
2564+
Delta additions from new_chunk_ids are placed at the end.
2565+
2566+
Args:
2567+
existing_full_chunk_ids: Complete list of existing chunk IDs from storage
2568+
old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
2569+
new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
2570+
2571+
Returns:
2572+
Updated list of chunk IDs with deduplication
2573+
2574+
Example:
2575+
>>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
2576+
>>> old = ['chunk-1', 'chunk-2']
2577+
>>> new = ['chunk-2', 'chunk-4']
2578+
>>> compute_incremental_chunk_ids(existing, old, new)
2579+
['chunk-3', 'chunk-2', 'chunk-4']
2580+
"""
2581+
# Calculate changes
2582+
chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
2583+
chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
2584+
2585+
# Apply changes to full chunk_ids
2586+
# Step 1: Remove chunks that are no longer needed
2587+
updated_chunk_ids = [
2588+
cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
2589+
]
2590+
2591+
# Step 2: Add new chunks (preserving order from new_chunk_ids)
2592+
# Note: 'cid not in updated_chunk_ids' check ensures deduplication
2593+
for cid in new_chunk_ids:
2594+
if cid in chunks_to_add and cid not in updated_chunk_ids:
2595+
updated_chunk_ids.append(cid)
2596+
2597+
return updated_chunk_ids
2598+
2599+
25542600
def subtract_source_ids(
25552601
source_ids: Iterable[str],
25562602
ids_to_remove: Collection[str],

0 commit comments

Comments
 (0)