Skip to content

Commit a3370b0

Browse files
committed
Add chunk tracking cleanup to entity/relation deletion and creation
• Clean up chunk storage on delete • Track chunks in create operations • Normalize relation keys consistently
1 parent bf1897a commit a3370b0

File tree

1 file changed

+153
-35
lines changed

1 file changed

+153
-35
lines changed

‎lightrag/utils_graph.py‎

Lines changed: 153 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,24 @@
1212

1313

1414
async def adelete_by_entity(
15-
chunk_entity_relation_graph, entities_vdb, relationships_vdb, entity_name: str
15+
chunk_entity_relation_graph,
16+
entities_vdb,
17+
relationships_vdb,
18+
entity_name: str,
19+
entity_chunks_storage=None,
20+
relation_chunks_storage=None,
1621
) -> DeletionResult:
1722
"""Asynchronously delete an entity and all its relationships.
1823
24+
Also cleans up entity_chunks_storage and relation_chunks_storage to remove chunk tracking.
25+
1926
Args:
2027
chunk_entity_relation_graph: Graph storage instance
2128
entities_vdb: Vector database storage for entities
2229
relationships_vdb: Vector database storage for relationships
2330
entity_name: Name of the entity to delete
31+
entity_chunks_storage: Optional KV storage for tracking chunks that reference this entity
32+
relation_chunks_storage: Optional KV storage for tracking chunks that reference relations
2433
"""
2534
graph_db_lock = get_graph_db_lock(enable_logging=False)
2635
# Use graph database lock to ensure atomic graph and vector db operations
@@ -39,14 +48,43 @@ async def adelete_by_entity(
3948
edges = await chunk_entity_relation_graph.get_node_edges(entity_name)
4049
related_relations_count = len(edges) if edges else 0
4150

51+
# Clean up chunk tracking storages before deletion
52+
if entity_chunks_storage is not None:
53+
# Delete entity's entry from entity_chunks_storage
54+
await entity_chunks_storage.delete([entity_name])
55+
logger.info(
56+
f"Entity Delete: removed chunk tracking for `{entity_name}`"
57+
)
58+
59+
if relation_chunks_storage is not None and edges:
60+
# Delete all related relationships from relation_chunks_storage
61+
from .utils import make_relation_chunk_key
62+
63+
relation_keys_to_delete = []
64+
for src, tgt in edges:
65+
# Normalize entity order for consistent key generation
66+
normalized_src, normalized_tgt = sorted([src, tgt])
67+
storage_key = make_relation_chunk_key(normalized_src, normalized_tgt)
68+
relation_keys_to_delete.append(storage_key)
69+
70+
if relation_keys_to_delete:
71+
await relation_chunks_storage.delete(relation_keys_to_delete)
72+
logger.info(
73+
f"Entity Delete: removed chunk tracking for {len(relation_keys_to_delete)} relations"
74+
)
75+
4276
await entities_vdb.delete_entity(entity_name)
4377
await relationships_vdb.delete_entity_relation(entity_name)
4478
await chunk_entity_relation_graph.delete_node(entity_name)
4579

46-
message = f"Entity '{entity_name}' and its {related_relations_count} relationships have been deleted."
80+
message = f"Entity Delete: remove '{entity_name}' and its {related_relations_count} relations"
4781
logger.info(message)
4882
await _delete_by_entity_done(
49-
entities_vdb, relationships_vdb, chunk_entity_relation_graph
83+
entities_vdb,
84+
relationships_vdb,
85+
chunk_entity_relation_graph,
86+
entity_chunks_storage,
87+
relation_chunks_storage,
5088
)
5189
return DeletionResult(
5290
status="success",
@@ -66,17 +104,23 @@ async def adelete_by_entity(
66104

67105

68106
async def _delete_by_entity_done(
69-
entities_vdb, relationships_vdb, chunk_entity_relation_graph
107+
entities_vdb,
108+
relationships_vdb,
109+
chunk_entity_relation_graph,
110+
entity_chunks_storage=None,
111+
relation_chunks_storage=None,
70112
) -> None:
71113
"""Callback after entity deletion is complete, ensures updates are persisted"""
114+
storages = [entities_vdb, relationships_vdb, chunk_entity_relation_graph]
115+
if entity_chunks_storage is not None:
116+
storages.append(entity_chunks_storage)
117+
if relation_chunks_storage is not None:
118+
storages.append(relation_chunks_storage)
119+
72120
await asyncio.gather(
73121
*[
74122
cast(StorageNameSpace, storage_inst).index_done_callback()
75-
for storage_inst in [ # type: ignore
76-
entities_vdb,
77-
relationships_vdb,
78-
chunk_entity_relation_graph,
79-
]
123+
for storage_inst in storages # type: ignore
80124
]
81125
)
82126

@@ -86,14 +130,18 @@ async def adelete_by_relation(
86130
relationships_vdb,
87131
source_entity: str,
88132
target_entity: str,
133+
relation_chunks_storage=None,
89134
) -> DeletionResult:
90135
"""Asynchronously delete a relation between two entities.
91136
137+
Also cleans up relation_chunks_storage to remove chunk tracking.
138+
92139
Args:
93140
chunk_entity_relation_graph: Graph storage instance
94141
relationships_vdb: Vector database storage for relationships
95142
source_entity: Name of the source entity
96143
target_entity: Name of the target entity
144+
relation_chunks_storage: Optional KV storage for tracking chunks that reference this relation
97145
"""
98146
relation_str = f"{source_entity} -> {target_entity}"
99147
graph_db_lock = get_graph_db_lock(enable_logging=False)
@@ -118,6 +166,19 @@ async def adelete_by_relation(
118166
status_code=404,
119167
)
120168

169+
# Clean up chunk tracking storage before deletion
170+
if relation_chunks_storage is not None:
171+
from .utils import make_relation_chunk_key
172+
173+
# Normalize entity order for consistent key generation
174+
normalized_src, normalized_tgt = sorted([source_entity, target_entity])
175+
storage_key = make_relation_chunk_key(normalized_src, normalized_tgt)
176+
177+
await relation_chunks_storage.delete([storage_key])
178+
logger.info(
179+
f"Relation Delete: removed chunk tracking for `{source_entity}`~`{target_entity}`"
180+
)
181+
121182
# Delete relation from vector database
122183
rel_ids_to_delete = [
123184
compute_mdhash_id(source_entity + target_entity, prefix="rel-"),
@@ -131,9 +192,11 @@ async def adelete_by_relation(
131192
[(source_entity, target_entity)]
132193
)
133194

134-
message = f"Successfully deleted relation from '{source_entity}' to '{target_entity}'"
195+
message = f"Relation Delete: `{source_entity}`~`{target_entity}` deleted successfully"
135196
logger.info(message)
136-
await _delete_relation_done(relationships_vdb, chunk_entity_relation_graph)
197+
await _delete_relation_done(
198+
relationships_vdb, chunk_entity_relation_graph, relation_chunks_storage
199+
)
137200
return DeletionResult(
138201
status="success",
139202
doc_id=relation_str,
@@ -151,15 +214,18 @@ async def adelete_by_relation(
151214
)
152215

153216

154-
async def _delete_relation_done(relationships_vdb, chunk_entity_relation_graph) -> None:
217+
async def _delete_relation_done(
218+
relationships_vdb, chunk_entity_relation_graph, relation_chunks_storage=None
219+
) -> None:
155220
"""Callback after relation deletion is complete, ensures updates are persisted"""
221+
storages = [relationships_vdb, chunk_entity_relation_graph]
222+
if relation_chunks_storage is not None:
223+
storages.append(relation_chunks_storage)
224+
156225
await asyncio.gather(
157226
*[
158227
cast(StorageNameSpace, storage_inst).index_done_callback()
159-
for storage_inst in [ # type: ignore
160-
relationships_vdb,
161-
chunk_entity_relation_graph,
162-
]
228+
for storage_inst in storages # type: ignore
163229
]
164230
)
165231

@@ -506,7 +572,7 @@ async def aedit_entity(
506572
relation_chunks_storage,
507573
)
508574

509-
logger.info(f"Entity '{entity_name}' successfully updated")
575+
logger.info(f"Entity Edit: `{entity_name}` successfully updated")
510576
return await get_entity_info(
511577
chunk_entity_relation_graph,
512578
entities_vdb,
@@ -586,12 +652,14 @@ async def aedit_relation(
586652
source_entity, target_entity
587653
)
588654
# Important: First delete the old relation record from the vector database
589-
old_relation_id = compute_mdhash_id(
590-
source_entity + target_entity, prefix="rel-"
591-
)
592-
await relationships_vdb.delete([old_relation_id])
593-
logger.info(
594-
f"Deleted old relation record from vector database for relation {source_entity} -> {target_entity}"
655+
# Delete both permutations to handle relationships created before normalization
656+
rel_ids_to_delete = [
657+
compute_mdhash_id(source_entity + target_entity, prefix="rel-"),
658+
compute_mdhash_id(target_entity + source_entity, prefix="rel-"),
659+
]
660+
await relationships_vdb.delete(rel_ids_to_delete)
661+
logger.debug(
662+
f"Relation Delete: delete vdb for `{source_entity}`~`{target_entity}`"
595663
)
596664

597665
# 2. Update relation information in the graph
@@ -690,14 +758,8 @@ async def aedit_relation(
690758
}
691759
)
692760

693-
reason = (
694-
"source_id changed"
695-
if source_id_changed
696-
else "initialized from graph"
697-
)
698761
logger.info(
699-
f"Updated relation_chunks_storage for '{source_entity}' -> '{target_entity}': "
700-
f"{len(updated_chunk_ids)} chunks ({reason})"
762+
f"Relation Delete: update chunk tracking for `{source_entity}`~`{target_entity}`"
701763
)
702764

703765
# 5. Save changes
@@ -706,7 +768,7 @@ async def aedit_relation(
706768
)
707769

708770
logger.info(
709-
f"Relation from '{source_entity}' to '{target_entity}' successfully updated"
771+
f"Relation Delete: `{source_entity}`~`{target_entity}`' successfully updated"
710772
)
711773
return await get_relation_info(
712774
chunk_entity_relation_graph,
@@ -744,17 +806,22 @@ async def acreate_entity(
744806
relationships_vdb,
745807
entity_name: str,
746808
entity_data: dict[str, Any],
809+
entity_chunks_storage=None,
810+
relation_chunks_storage=None,
747811
) -> dict[str, Any]:
748812
"""Asynchronously create a new entity.
749813
750814
Creates a new entity in the knowledge graph and adds it to the vector database.
815+
Also synchronizes entity_chunks_storage to track chunk references.
751816
752817
Args:
753818
chunk_entity_relation_graph: Graph storage instance
754819
entities_vdb: Vector database storage for entities
755820
relationships_vdb: Vector database storage for relationships
756821
entity_name: Name of the new entity
757822
entity_data: Dictionary containing entity attributes, e.g. {"description": "description", "entity_type": "type"}
823+
entity_chunks_storage: Optional KV storage for tracking chunks that reference this entity
824+
relation_chunks_storage: Optional KV storage for tracking chunks that reference relations
758825
759826
Returns:
760827
Dictionary containing created entity information
@@ -805,12 +872,34 @@ async def acreate_entity(
805872
# Update vector database
806873
await entities_vdb.upsert(entity_data_for_vdb)
807874

875+
# Update entity_chunks_storage to track chunk references
876+
if entity_chunks_storage is not None:
877+
source_id = node_data.get("source_id", "")
878+
chunk_ids = [cid for cid in source_id.split(GRAPH_FIELD_SEP) if cid]
879+
880+
if chunk_ids:
881+
await entity_chunks_storage.upsert(
882+
{
883+
entity_name: {
884+
"chunk_ids": chunk_ids,
885+
"count": len(chunk_ids),
886+
}
887+
}
888+
)
889+
logger.info(
890+
f"Entity Create: tracked {len(chunk_ids)} chunks for `{entity_name}`"
891+
)
892+
808893
# Save changes
809894
await _edit_entity_done(
810-
entities_vdb, relationships_vdb, chunk_entity_relation_graph
895+
entities_vdb,
896+
relationships_vdb,
897+
chunk_entity_relation_graph,
898+
entity_chunks_storage,
899+
relation_chunks_storage,
811900
)
812901

813-
logger.info(f"Entity '{entity_name}' successfully created")
902+
logger.info(f"Entity Create: '{entity_name}' successfully created")
814903
return await get_entity_info(
815904
chunk_entity_relation_graph,
816905
entities_vdb,
@@ -829,10 +918,12 @@ async def acreate_relation(
829918
source_entity: str,
830919
target_entity: str,
831920
relation_data: dict[str, Any],
921+
relation_chunks_storage=None,
832922
) -> dict[str, Any]:
833923
"""Asynchronously create a new relation between entities.
834924
835925
Creates a new relation (edge) in the knowledge graph and adds it to the vector database.
926+
Also synchronizes relation_chunks_storage to track chunk references.
836927
837928
Args:
838929
chunk_entity_relation_graph: Graph storage instance
@@ -841,6 +932,7 @@ async def acreate_relation(
841932
source_entity: Name of the source entity
842933
target_entity: Name of the target entity
843934
relation_data: Dictionary containing relation attributes, e.g. {"description": "description", "keywords": "keywords"}
935+
relation_chunks_storage: Optional KV storage for tracking chunks that reference this relation
844936
845937
Returns:
846938
Dictionary containing created relation information
@@ -917,11 +1009,37 @@ async def acreate_relation(
9171009
# Update vector database
9181010
await relationships_vdb.upsert(relation_data_for_vdb)
9191011

1012+
# Update relation_chunks_storage to track chunk references
1013+
if relation_chunks_storage is not None:
1014+
from .utils import make_relation_chunk_key
1015+
1016+
# Normalize entity order for consistent key generation
1017+
normalized_src, normalized_tgt = sorted([source_entity, target_entity])
1018+
storage_key = make_relation_chunk_key(normalized_src, normalized_tgt)
1019+
1020+
source_id = edge_data.get("source_id", "")
1021+
chunk_ids = [cid for cid in source_id.split(GRAPH_FIELD_SEP) if cid]
1022+
1023+
if chunk_ids:
1024+
await relation_chunks_storage.upsert(
1025+
{
1026+
storage_key: {
1027+
"chunk_ids": chunk_ids,
1028+
"count": len(chunk_ids),
1029+
}
1030+
}
1031+
)
1032+
logger.info(
1033+
f"Relation Create: tracked {len(chunk_ids)} chunks for `{source_entity}`~`{target_entity}`"
1034+
)
1035+
9201036
# Save changes
921-
await _edit_relation_done(relationships_vdb, chunk_entity_relation_graph)
1037+
await _edit_relation_done(
1038+
relationships_vdb, chunk_entity_relation_graph, relation_chunks_storage
1039+
)
9221040

9231041
logger.info(
924-
f"Relation from '{source_entity}' to '{target_entity}' successfully created"
1042+
f"Relation Create: `{source_entity}`~`{target_entity}` successfully created"
9251043
)
9261044
return await get_relation_info(
9271045
chunk_entity_relation_graph,

0 commit comments

Comments
 (0)