Skip to content

Commit 3fbd704

Browse files
committed
Enhance entity/relation editing with chunk tracking synchronization
• Add chunk storage sync to edit ops • Implement incremental chunk ID updates • Support entity renaming migrations • Normalize relation keys consistently • Preserve chunk references on edits
1 parent 11f1f36 commit 3fbd704

File tree

3 files changed

+325
-28
lines changed

3 files changed

+325
-28
lines changed

‎lightrag/lightrag.py‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3582,6 +3582,7 @@ async def aedit_entity(
35823582
"""Asynchronously edit entity information.
35833583
35843584
Updates entity information in the knowledge graph and re-embeds the entity in the vector database.
3585+
Also synchronizes entity_chunks_storage and relation_chunks_storage to track chunk references.
35853586
35863587
Args:
35873588
entity_name: Name of the entity to edit
@@ -3600,6 +3601,8 @@ async def aedit_entity(
36003601
entity_name,
36013602
updated_data,
36023603
allow_rename,
3604+
self.entity_chunks,
3605+
self.relation_chunks,
36033606
)
36043607

36053608
def edit_entity(
@@ -3616,6 +3619,7 @@ async def aedit_relation(
36163619
"""Asynchronously edit relation information.
36173620
36183621
Updates relation (edge) information in the knowledge graph and re-embeds the relation in the vector database.
3622+
Also synchronizes the relation_chunks_storage to track which chunks reference this relation.
36193623
36203624
Args:
36213625
source_entity: Name of the source entity
@@ -3634,6 +3638,7 @@ async def aedit_relation(
36343638
source_entity,
36353639
target_entity,
36363640
updated_data,
3641+
self.relation_chunks,
36373642
)
36383643

36393644
def edit_relation(

‎lightrag/utils.py‎

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,6 +2551,52 @@ def apply_source_ids_limit(
25512551
return truncated
25522552

25532553

2554+
def compute_incremental_chunk_ids(
2555+
existing_full_chunk_ids: list[str],
2556+
old_chunk_ids: list[str],
2557+
new_chunk_ids: list[str],
2558+
) -> list[str]:
2559+
"""
2560+
Compute incrementally updated chunk IDs based on changes.
2561+
2562+
This function applies delta changes (additions and removals) to an existing
2563+
list of chunk IDs while maintaining order and ensuring deduplication.
2564+
Delta additions from new_chunk_ids are placed at the end.
2565+
2566+
Args:
2567+
existing_full_chunk_ids: Complete list of existing chunk IDs from storage
2568+
old_chunk_ids: Previous chunk IDs from source_id (chunks being replaced)
2569+
new_chunk_ids: New chunk IDs from updated source_id (chunks being added)
2570+
2571+
Returns:
2572+
Updated list of chunk IDs with deduplication
2573+
2574+
Example:
2575+
>>> existing = ['chunk-1', 'chunk-2', 'chunk-3']
2576+
>>> old = ['chunk-1', 'chunk-2']
2577+
>>> new = ['chunk-2', 'chunk-4']
2578+
>>> compute_incremental_chunk_ids(existing, old, new)
2579+
['chunk-3', 'chunk-2', 'chunk-4']
2580+
"""
2581+
# Calculate changes
2582+
chunks_to_remove = set(old_chunk_ids) - set(new_chunk_ids)
2583+
chunks_to_add = set(new_chunk_ids) - set(old_chunk_ids)
2584+
2585+
# Apply changes to full chunk_ids
2586+
# Step 1: Remove chunks that are no longer needed
2587+
updated_chunk_ids = [
2588+
cid for cid in existing_full_chunk_ids if cid not in chunks_to_remove
2589+
]
2590+
2591+
# Step 2: Add new chunks (preserving order from new_chunk_ids)
2592+
# Note: 'cid not in updated_chunk_ids' check ensures deduplication
2593+
for cid in new_chunk_ids:
2594+
if cid in chunks_to_add and cid not in updated_chunk_ids:
2595+
updated_chunk_ids.append(cid)
2596+
2597+
return updated_chunk_ids
2598+
2599+
25542600
def subtract_source_ids(
25552601
source_ids: Iterable[str],
25562602
ids_to_remove: Collection[str],

0 commit comments

Comments
 (0)