Skip to content

Commit b501e86

Browse files
authored
Merge branch 'HKUDS:main' into main
2 parents a950759 + 6dd778e commit b501e86

File tree

2 files changed

+55
-21
lines changed

2 files changed

+55
-21
lines changed

‎lightrag/operate.py‎

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
pack_user_ass_to_openai_messages,
1616
split_string_by_multi_markers,
1717
truncate_list_by_token_size,
18+
process_combine_contexts,
1819
)
1920
from .base import (
2021
BaseGraphStorage,
@@ -1003,35 +1004,28 @@ def extract_sections(context):
10031004
ll_entities, ll_relationships, ll_sources = extract_sections(low_level_context)
10041005

10051006
# Combine and deduplicate the entities
1006-
combined_entities_set = set(
1007-
filter(None, hl_entities.strip().split("\n") + ll_entities.strip().split("\n"))
1008-
)
1009-
combined_entities = "\n".join(combined_entities_set)
1010-
1007+
combined_entities = process_combine_contexts(hl_entities, ll_entities)
1008+
10111009
# Combine and deduplicate the relationships
1012-
combined_relationships_set = set(
1013-
filter(
1014-
None,
1015-
hl_relationships.strip().split("\n") + ll_relationships.strip().split("\n"),
1016-
)
1017-
)
1018-
combined_relationships = "\n".join(combined_relationships_set)
1010+
combined_relationships = process_combine_contexts(hl_relationships, ll_relationships)
10191011

10201012
# Combine and deduplicate the sources
1021-
combined_sources_set = set(
1022-
filter(None, hl_sources.strip().split("\n") + ll_sources.strip().split("\n"))
1023-
)
1024-
combined_sources = "\n".join(combined_sources_set)
1013+
combined_sources = process_combine_contexts(hl_sources, ll_sources)
10251014

10261015
# Format the combined context
10271016
return f"""
10281017
-----Entities-----
10291018
```csv
10301019
{combined_entities}
1020+
```
10311021
-----Relationships-----
1022+
```csv
10321023
{combined_relationships}
1024+
```
10331025
-----Sources-----
1026+
```csv
10341027
{combined_sources}
1028+
``
10351029
"""
10361030

10371031

‎lightrag/utils.py‎

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import asyncio
22
import html
3+
import io
4+
import csv
35
import json
46
import logging
57
import os
68
import re
79
from dataclasses import dataclass
810
from functools import wraps
911
from hashlib import md5
10-
from typing import Any, Union
12+
from typing import Any, Union,List
1113
import xml.etree.ElementTree as ET
1214

1315
import numpy as np
@@ -174,11 +176,17 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size:
174176
return list_data[:i]
175177
return list_data
176178

179+
def list_of_list_to_csv(data: List[List[str]]) -> str:
180+
output = io.StringIO()
181+
writer = csv.writer(output)
182+
writer.writerows(data)
183+
return output.getvalue()
184+
def csv_string_to_list(csv_string: str) -> List[List[str]]:
185+
output = io.StringIO(csv_string)
186+
reader = csv.reader(output)
187+
return [row for row in reader]
188+
177189

178-
def list_of_list_to_csv(data: list[list]):
179-
return "\n".join(
180-
[",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data]
181-
)
182190

183191

184192
def save_data_to_file(data, file_name):
@@ -244,3 +252,35 @@ def xml_to_json(xml_file):
244252
except Exception as e:
245253
print(f"An error occurred: {e}")
246254
return None
255+
256+
def process_combine_contexts(hl, ll):
257+
header = None
258+
list_hl = csv_string_to_list(hl.strip())
259+
list_ll = csv_string_to_list(ll.strip())
260+
261+
if list_hl:
262+
header=list_hl[0]
263+
list_hl = list_hl[1:]
264+
if list_ll:
265+
header = list_ll[0]
266+
list_ll = list_ll[1:]
267+
if header is None:
268+
return ""
269+
270+
if list_hl:
271+
list_hl = [','.join(item[1:]) for item in list_hl if item]
272+
if list_ll:
273+
list_ll = [','.join(item[1:]) for item in list_ll if item]
274+
275+
combined_sources_set = set(
276+
filter(None, list_hl + list_ll)
277+
)
278+
279+
combined_sources = [",\t".join(header)]
280+
281+
for i, item in enumerate(combined_sources_set, start=1):
282+
combined_sources.append(f"{i},\t{item}")
283+
284+
combined_sources = "\n".join(combined_sources)
285+
286+
return combined_sources

0 commit comments

Comments
 (0)