johnson7788
diff --git a/‎examples/LightRAG_utils.py‎
Lines changed: 28 additions & 1 deletion b/‎examples/LightRAG_utils.py‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎lightrag/lightrag.py‎
Lines changed: 6 additions & 1 deletion b/‎lightrag/lightrag.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎lightrag/operate.py‎
Lines changed: 8 additions & 0 deletions b/‎lightrag/operate.py‎
Lines changed: 8 additions & 0 deletions
@@ -16,15 +16,42 @@
 from functools import wraps
 from lightrag.utils import xml_to_json
 from neo4j import GraphDatabase
+from lingua import Language, LanguageDetectorBuilder  #pip install lingua-language-detector
 from firecrawl import FirecrawlApp   #pip install firecrawl-py
 import fitz  # PyMuPDF
 import tika
 from tika import parser as tikaParser
+tika_path = "/media/wac/backup/john/johnson/LightRAG/examples/tika-server.jar"
 TIKA_SERVER_JAR = "file:////media/wac/backup/john/johnson/LightRAG/examples/tika-server.jar"
-if not os.path.exists(TIKA_SERVER_JAR):
+if not os.path.exists(tika_path):
     TIKA_SERVER_JAR = "file:////Users/admin/git/tika/tika-server-standard-2.9.0-bin/tika-server.jar"
 os.environ['TIKA_SERVER_JAR'] = TIKA_SERVER_JAR
 
+def detect_language(content):
+    """
+    检测文本语言
+    Args:
+        content ():
+        英语，法语，德语，西班牙，中文，日语，韩语
+    Returns:
+    """
+    language_pair = {
+        Language.ENGLISH: "english",
+        Language.FRENCH: "french",
+        Language.GERMAN: "german",
+        Language.SPANISH: "spanish",
+        Language.CHINESE: "chinese",
+        Language.JAPANESE: "japanese",
+        Language.KOREAN: "korean",
+    }
+    languages = [Language.ENGLISH, Language.FRENCH, Language.GERMAN, Language.SPANISH, Language.CHINESE,Language.JAPANESE,Language.KOREAN]
+    detector = LanguageDetectorBuilder.from_languages(*languages).build()
+    language = detector.detect_language_of(content)
+    if language not in language_pair:
+        print(f"��入数据{content}被检测成未知的语言，请修改language_pair进行兼���: {language}")
+    language_str = language_pair.get(language, "english")
+    return language_str
+
 class MyFirecrawl():
     def __init__(self, api_key="EXAMPLE", api_url="http://127.0.0.1:3002"):
         """
 
@@ -343,7 +343,8 @@ def query(self, query: str, param: QueryParam = QueryParam()):
         loop = always_get_an_event_loop()
         return loop.run_until_complete(self.aquery(query, param))
 
-    async def aquery(self, query: str, param: QueryParam = QueryParam()):
+    async def aquery(self, query: str, param: QueryParam = QueryParam(), history:list = []):
+        # history: 历史聊天对话
         if param.mode == "local":
             response = await local_query(
                 query,
@@ -353,6 +354,7 @@ async def aquery(self, query: str, param: QueryParam = QueryParam()):
                 self.text_chunks,
                 param,
                 asdict(self),
+                history
             )
         elif param.mode == "global":
             response = await global_query(
@@ -363,6 +365,7 @@ async def aquery(self, query: str, param: QueryParam = QueryParam()):
                 self.text_chunks,
                 param,
                 asdict(self),
+                history
             )
         elif param.mode == "hybrid":
             response = await hybrid_query(
@@ -373,6 +376,7 @@ async def aquery(self, query: str, param: QueryParam = QueryParam()):
                 self.text_chunks,
                 param,
                 asdict(self),
+                history
             )
         elif param.mode == "naive":
             response = await naive_query(
@@ -381,6 +385,7 @@ async def aquery(self, query: str, param: QueryParam = QueryParam()):
                 self.text_chunks,
                 param,
                 asdict(self),
+                history
             )
         else:
             raise ValueError(f"Unknown mode {param.mode}")
 
@@ -397,6 +397,7 @@ async def local_query(
     text_chunks_db: BaseKVStorage[TextChunkSchema],
     query_param: QueryParam,
     global_config: dict,
+    history: list[dict] = [],
 ) -> str:
     context = None
     use_model_func = global_config["llm_model_func"]
@@ -446,6 +447,7 @@ async def local_query(
     response = await use_model_func(
         query,
         system_prompt=sys_prompt,
+        history_messages=history
     )
     if len(response) > len(sys_prompt):
         response = (
@@ -670,6 +672,7 @@ async def global_query(
     text_chunks_db: BaseKVStorage[TextChunkSchema],
     query_param: QueryParam,
     global_config: dict,
+    history: list[dict] = [],
 ) -> str:
     context = None
     use_model_func = global_config["llm_model_func"]
@@ -723,6 +726,7 @@ async def global_query(
     response = await use_model_func(
         query,
         system_prompt=sys_prompt,
+        history_messages=history
     )
     if len(response) > len(sys_prompt):
         response = (
@@ -916,6 +920,7 @@ async def hybrid_query(
     text_chunks_db: BaseKVStorage[TextChunkSchema],
     query_param: QueryParam,
     global_config: dict,
+    history: list[dict] = [],
 ) -> str:
     low_level_context = None
     high_level_context = None
@@ -984,6 +989,7 @@ async def hybrid_query(
     response = await use_model_func(
         query,
         system_prompt=sys_prompt,
+        history_messages=history
     )
     if len(response) > len(sys_prompt):
         response = (
@@ -1070,6 +1076,7 @@ async def naive_query(
     text_chunks_db: BaseKVStorage[TextChunkSchema],
     query_param: QueryParam,
     global_config: dict,
+    history: list[dict] = [],
 ):
     use_model_func = global_config["llm_model_func"]
     results = await chunks_vdb.query(query, top_k=query_param.top_k)
@@ -1094,6 +1101,7 @@ async def naive_query(
     response = await use_model_func(
         query,
         system_prompt=sys_prompt,
+        history_messages=history
     )
 
     if len(response) > len(sys_prompt):