microsoft
diff --git a/‎rdagent/app/data_science/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎rdagent/app/data_science/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎rdagent/app/data_science/loop.py‎
Lines changed: 6 additions & 1 deletion b/‎rdagent/app/data_science/loop.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎rdagent/components/coder/data_science/share/doc.py‎
Lines changed: 37 additions & 0 deletions b/‎rdagent/components/coder/data_science/share/doc.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎rdagent/components/coder/data_science/share/prompts.yaml‎
Lines changed: 33 additions & 0 deletions b/‎rdagent/components/coder/data_science/share/prompts.yaml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎rdagent/utils/agent/ret.py‎
Lines changed: 14 additions & 0 deletions b/‎rdagent/utils/agent/ret.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎rdagent/utils/agent/tpl.yaml‎
Lines changed: 8 additions & 2 deletions b/‎rdagent/utils/agent/tpl.yaml‎
Lines changed: 8 additions & 2 deletions
@@ -38,6 +38,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
 
     ### model dump
     enable_model_dump: bool = False
+    enable_doc_dev: bool = False
     model_dump_check_level: Literal["medium", "high"] = "medium"
 
 
 
@@ -14,6 +14,7 @@
 from rdagent.components.coder.data_science.pipeline.exp import PipelineTask
 from rdagent.components.coder.data_science.raw_data_loader import DataLoaderCoSTEER
 from rdagent.components.coder.data_science.raw_data_loader.exp import DataLoaderTask
+from rdagent.components.coder.data_science.share.doc import DocDev
 from rdagent.components.coder.data_science.workflow import WorkflowCoSTEER
 from rdagent.components.coder.data_science.workflow.exp import WorkflowTask
 from rdagent.components.workflow.conf import BasePropSetting
@@ -64,6 +65,8 @@ def __init__(self, PROP_SETTING: BasePropSetting):
         self.pipeline_coder = PipelineCoSTEER(scen)
 
         self.runner = DSCoSTEERRunner(scen)
+        if DS_RD_SETTING.enable_doc_dev:
+            self.docdev = DocDev(scen)
         # self.summarizer: Experiment2Feedback = import_class(PROP_SETTING.summarizer)(scen)
         # logger.log_object(self.summarizer, tag="summarizer")
 
@@ -109,7 +112,9 @@ def running(self, prev_out: dict[str, Any]):
         if exp.is_ready_to_run():
             new_exp = self.runner.develop(exp)
             logger.log_object(new_exp)
-            return new_exp
+            exp = new_exp
+        if DS_RD_SETTING.enable_doc_dev:
+            self.docdev.develop(exp)
         return exp
 
     def feedback(self, prev_out: dict[str, Any]) -> ExperimentFeedback:
 
@@ -0,0 +1,37 @@
+"""
+Developers concentrating on writing documents for a workspace
+"""
+
+from rdagent.core.developer import Developer
+from rdagent.core.experiment import Experiment, FBWorkspace
+from rdagent.oai.llm_utils import APIBackend
+from rdagent.utils.agent.ret import MarkdownAgentOut
+from rdagent.utils.agent.workflow import T
+
+
+class DocDev(Developer[Experiment]):
+    """
+    The developer is responsible for writing documents for a workspace.
+    """
+
+    def develop(self, exp: Experiment) -> None:
+        """
+        Write documents for the workspace.
+        """
+        ws: FBWorkspace = exp.experiment_workspace
+
+        file_li = [str(file.relative_to(ws.workspace_path)) for file in ws.workspace_path.iterdir() if file.is_file()]
+
+        key_file_list = ["main.py", "scores.csv"]
+
+        system_prompt = T(".prompts:dump_model_eval.system").r()
+        user_prompt = T(".prompts:dump_model_eval.user").r(
+            file_li=file_li,
+            key_files={f: (ws.workspace_path / f).read_text() for f in key_file_list},
+        )
+
+        resp = APIBackend().build_messages_and_create_chat_completion(
+            user_prompt=user_prompt, system_prompt=system_prompt
+        )
+        markdown = MarkdownAgentOut.extract_output(resp)
+        ws.inject_files({"README.md": markdown})
@@ -52,3 +52,36 @@ dump_model_eval:
     # Inference:
     {{scores_content_after}}
 
+
+docdev:
+  system: |-
+    You are a skilled developer and a Kaggle grandmaster. Your task is to create documentation for a data science solution.
+
+    You will be given:
+    - a list of files in the folder.
+    - content from some important files.
+
+    Please explain the trained models in the "models/" folder. The training and inference processes are detailed in the `main.py` file. The models' evaluation results are in `scores.csv`. Please respond with a markdown file that includes the following information:
+    - Explain the purpose of each model. If some models are part of a group (like those from cross-validation), describe them together.
+    - Provide key details for each model group:
+      - Important training parameters
+      - Model details
+      - Performance of each model
+    - Ensemble
+
+    {% include "rdagent.utils.agent.tpl:MarkdownOut" %}
+
+  user: |-
+    --------------- The file list in the workspace ---------------
+    {% for f in file_li %}
+    - {{ f }}
+    {% endfor %}
+
+    --------------- File content of each file ---------------
+    {% for fname, content in key_files.items() %}
+    File Path: {{fname}}
+    ```
+    {{content}}
+    ```
+    {% endfor %}
+
@@ -39,6 +39,20 @@ def extract_output(cls, resp: str):
         return resp
 
 
+class MarkdownAgentOut(AgentOut):
+    @classmethod
+    def get_spec(cls):
+        return T(".tpl:MarkdownOut").r()
+
+    @classmethod
+    def extract_output(cls, resp: str):
+        match = re.search(r".*````markdown\n(.*)\n````.*", resp, re.DOTALL)
+        if match:
+            content = match.group(1)
+            return content
+        return resp
+
+
 class BatchEditOut(AgentOut):
     json_mode: bool = True
 
 
@@ -3,7 +3,13 @@ PythonAgentOut: |-
   ```Python
   <You code>
   ```
-  
+
+MarkdownOut: |-
+  The return content should be like the format below(Please note tha "````" is used to avoid confliction of "```" in markdown file)
+  ````markdown
+  <the content of markdown file>
+  ````
+
 BatchEditOut: |-
   You should return an edition that applies to multiple files in a workspace in JSON.
   Except for the model file, other files should not be renamed.
@@ -46,4 +52,4 @@ PythonBatchEditOut: |-
   {% if with_del %}
   - To explicitly remove a file, provide only `__DEL__` within the code block for that file.
   - To replace a file with a new one, first provide ` __DEL__` for the original file, then include a separate entry with new file name and the new code.
-  {% endif %}
+  {% endif %}