Skip to content

Commit bcba6ea

Browse files
authored
feat: add DocDev for auto-generating workspace documentation (#781)
* feat: add DocDev for auto-generating workspace documentation * fix: update markdown instructions in tpl.yaml * feat: add enable_doc_dev flag and conditionally call DocDev
1 parent ab4d71a commit bcba6ea

6 files changed

Lines changed: 99 additions & 3 deletions

File tree

‎rdagent/app/data_science/conf.py‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class DataScienceBasePropSetting(KaggleBasePropSetting):
3838

3939
### model dump
4040
enable_model_dump: bool = False
41+
enable_doc_dev: bool = False
4142
model_dump_check_level: Literal["medium", "high"] = "medium"
4243

4344

‎rdagent/app/data_science/loop.py‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from rdagent.components.coder.data_science.pipeline.exp import PipelineTask
1515
from rdagent.components.coder.data_science.raw_data_loader import DataLoaderCoSTEER
1616
from rdagent.components.coder.data_science.raw_data_loader.exp import DataLoaderTask
17+
from rdagent.components.coder.data_science.share.doc import DocDev
1718
from rdagent.components.coder.data_science.workflow import WorkflowCoSTEER
1819
from rdagent.components.coder.data_science.workflow.exp import WorkflowTask
1920
from rdagent.components.workflow.conf import BasePropSetting
@@ -64,6 +65,8 @@ def __init__(self, PROP_SETTING: BasePropSetting):
6465
self.pipeline_coder = PipelineCoSTEER(scen)
6566

6667
self.runner = DSCoSTEERRunner(scen)
68+
if DS_RD_SETTING.enable_doc_dev:
69+
self.docdev = DocDev(scen)
6770
# self.summarizer: Experiment2Feedback = import_class(PROP_SETTING.summarizer)(scen)
6871
# logger.log_object(self.summarizer, tag="summarizer")
6972

@@ -109,7 +112,9 @@ def running(self, prev_out: dict[str, Any]):
109112
if exp.is_ready_to_run():
110113
new_exp = self.runner.develop(exp)
111114
logger.log_object(new_exp)
112-
return new_exp
115+
exp = new_exp
116+
if DS_RD_SETTING.enable_doc_dev:
117+
self.docdev.develop(exp)
113118
return exp
114119

115120
def feedback(self, prev_out: dict[str, Any]) -> ExperimentFeedback:
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""
2+
Developers concentrating on writing documents for a workspace
3+
"""
4+
5+
from rdagent.core.developer import Developer
6+
from rdagent.core.experiment import Experiment, FBWorkspace
7+
from rdagent.oai.llm_utils import APIBackend
8+
from rdagent.utils.agent.ret import MarkdownAgentOut
9+
from rdagent.utils.agent.workflow import T
10+
11+
12+
class DocDev(Developer[Experiment]):
13+
"""
14+
The developer is responsible for writing documents for a workspace.
15+
"""
16+
17+
def develop(self, exp: Experiment) -> None:
18+
"""
19+
Write documents for the workspace.
20+
"""
21+
ws: FBWorkspace = exp.experiment_workspace
22+
23+
file_li = [str(file.relative_to(ws.workspace_path)) for file in ws.workspace_path.iterdir() if file.is_file()]
24+
25+
key_file_list = ["main.py", "scores.csv"]
26+
27+
system_prompt = T(".prompts:dump_model_eval.system").r()
28+
user_prompt = T(".prompts:dump_model_eval.user").r(
29+
file_li=file_li,
30+
key_files={f: (ws.workspace_path / f).read_text() for f in key_file_list},
31+
)
32+
33+
resp = APIBackend().build_messages_and_create_chat_completion(
34+
user_prompt=user_prompt, system_prompt=system_prompt
35+
)
36+
markdown = MarkdownAgentOut.extract_output(resp)
37+
ws.inject_files({"README.md": markdown})

‎rdagent/components/coder/data_science/share/prompts.yaml‎

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,36 @@ dump_model_eval:
5252
# Inference:
5353
{{scores_content_after}}
5454
55+
56+
docdev:
57+
system: |-
58+
You are a skilled developer and a Kaggle grandmaster. Your task is to create documentation for a data science solution.
59+
60+
You will be given:
61+
- a list of files in the folder.
62+
- content from some important files.
63+
64+
Please explain the trained models in the "models/" folder. The training and inference processes are detailed in the `main.py` file. The models' evaluation results are in `scores.csv`. Please respond with a markdown file that includes the following information:
65+
- Explain the purpose of each model. If some models are part of a group (like those from cross-validation), describe them together.
66+
- Provide key details for each model group:
67+
- Important training parameters
68+
- Model details
69+
- Performance of each model
70+
- Ensemble
71+
72+
{% include "rdagent.utils.agent.tpl:MarkdownOut" %}
73+
74+
user: |-
75+
--------------- The file list in the workspace ---------------
76+
{% for f in file_li %}
77+
- {{ f }}
78+
{% endfor %}
79+
80+
--------------- File content of each file ---------------
81+
{% for fname, content in key_files.items() %}
82+
File Path: {{fname}}
83+
```
84+
{{content}}
85+
```
86+
{% endfor %}
87+

‎rdagent/utils/agent/ret.py‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,20 @@ def extract_output(cls, resp: str):
3939
return resp
4040

4141

42+
class MarkdownAgentOut(AgentOut):
43+
@classmethod
44+
def get_spec(cls):
45+
return T(".tpl:MarkdownOut").r()
46+
47+
@classmethod
48+
def extract_output(cls, resp: str):
49+
match = re.search(r".*````markdown\n(.*)\n````.*", resp, re.DOTALL)
50+
if match:
51+
content = match.group(1)
52+
return content
53+
return resp
54+
55+
4256
class BatchEditOut(AgentOut):
4357
json_mode: bool = True
4458

‎rdagent/utils/agent/tpl.yaml‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ PythonAgentOut: |-
33
```Python
44
<You code>
55
```
6-
6+
7+
MarkdownOut: |-
8+
The return content should be like the format below(Please note tha "````" is used to avoid confliction of "```" in markdown file)
9+
````markdown
10+
<the content of markdown file>
11+
````
12+
713
BatchEditOut: |-
814
You should return an edition that applies to multiple files in a workspace in JSON.
915
Except for the model file, other files should not be renamed.
@@ -46,4 +52,4 @@ PythonBatchEditOut: |-
4652
{% if with_del %}
4753
- To explicitly remove a file, provide only `__DEL__` within the code block for that file.
4854
- To replace a file with a new one, first provide ` __DEL__` for the original file, then include a separate entry with new file name and the new code.
49-
{% endif %}
55+
{% endif %}

0 commit comments

Comments
 (0)