@@ -560,19 +560,19 @@ async def _find_most_related_text_unit_from_entities(
560560 if not this_edges :
561561 continue
562562 all_one_hop_nodes .update ([e [1 ] for e in this_edges ])
563-
563+
564564 all_one_hop_nodes = list (all_one_hop_nodes )
565565 all_one_hop_nodes_data = await asyncio .gather (
566566 * [knowledge_graph_inst .get_node (e ) for e in all_one_hop_nodes ]
567567 )
568-
568+
569569 # Add null check for node data
570570 all_one_hop_text_units_lookup = {
571571 k : set (split_string_by_multi_markers (v ["source_id" ], [GRAPH_FIELD_SEP ]))
572572 for k , v in zip (all_one_hop_nodes , all_one_hop_nodes_data )
573573 if v is not None and "source_id" in v # Add source_id check
574574 }
575-
575+
576576 all_text_units_lookup = {}
577577 for index , (this_text_units , this_edges ) in enumerate (zip (text_units , edges )):
578578 for c_id in this_text_units :
@@ -586,37 +586,36 @@ async def _find_most_related_text_unit_from_entities(
586586 and c_id in all_one_hop_text_units_lookup [e [1 ]]
587587 ):
588588 relation_counts += 1
589-
589+
590590 chunk_data = await text_chunks_db .get_by_id (c_id )
591591 if chunk_data is not None and "content" in chunk_data : # Add content check
592592 all_text_units_lookup [c_id ] = {
593593 "data" : chunk_data ,
594594 "order" : index ,
595595 "relation_counts" : relation_counts ,
596596 }
597-
597+
598598 # Filter out None values and ensure data has content
599599 all_text_units = [
600- {"id" : k , ** v }
601- for k , v in all_text_units_lookup .items ()
600+ {"id" : k , ** v }
601+ for k , v in all_text_units_lookup .items ()
602602 if v is not None and v .get ("data" ) is not None and "content" in v ["data" ]
603603 ]
604-
604+
605605 if not all_text_units :
606606 logger .warning ("No valid text units found" )
607607 return []
608-
608+
609609 all_text_units = sorted (
610- all_text_units ,
611- key = lambda x : (x ["order" ], - x ["relation_counts" ])
610+ all_text_units , key = lambda x : (x ["order" ], - x ["relation_counts" ])
612611 )
613-
612+
614613 all_text_units = truncate_list_by_token_size (
615614 all_text_units ,
616615 key = lambda x : x ["data" ]["content" ],
617616 max_token_size = query_param .max_token_for_text_unit ,
618617 )
619-
618+
620619 all_text_units = [t ["data" ] for t in all_text_units ]
621620 return all_text_units
622621
0 commit comments