Skip to content

Commit 8a564ec

Browse files
authored
fix: fix some bugs in the entire loop (#274)
* fix some bugs in the entire loop * refine the code
1 parent 140fdcc commit 8a564ec

5 files changed

Lines changed: 21 additions & 10 deletions

File tree

‎rdagent/core/experiment.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def clear(self) -> None:
155155
"""
156156
Clear the workspace
157157
"""
158-
shutil.rmtree(self.workspace_path)
158+
shutil.rmtree(self.workspace_path, ignore_errors=True)
159159
self.code_dict = {}
160160

161161
def execute(self) -> object | None:

‎rdagent/scenarios/kaggle/developer/runner.py‎

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from rdagent.components.coder.factor_coder.factor import FactorTask
88
from rdagent.components.runner import CachedRunner
99
from rdagent.components.runner.conf import RUNNER_SETTINGS
10-
from rdagent.core.exception import ModelEmptyError
10+
from rdagent.core.exception import FactorEmptyError, ModelEmptyError
1111
from rdagent.core.experiment import ASpecificExp
1212
from rdagent.oai.llm_utils import md5_hash
1313
from rdagent.scenarios.kaggle.experiment.kaggle_experiment import (
@@ -41,12 +41,20 @@ class KGModelRunner(KGCachedRunner[KGModelExperiment]):
4141
def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
4242
self.build_from_SOTA(exp)
4343
if exp.sub_workspace_list[0].target_task.model_type == "XGBoost":
44+
if exp.sub_workspace_list[0].code_dict == {}:
45+
raise ModelEmptyError("No model is implemented")
4446
exp.experiment_workspace.inject_code(**{"model_xgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
4547
elif exp.sub_workspace_list[0].target_task.model_type == "RandomForest":
48+
if exp.sub_workspace_list[0].code_dict == {}:
49+
raise ModelEmptyError("No model is implemented")
4650
exp.experiment_workspace.inject_code(**{"model_rf.py": exp.sub_workspace_list[0].code_dict["model.py"]})
4751
elif exp.sub_workspace_list[0].target_task.model_type == "LightGBM":
52+
if exp.sub_workspace_list[0].code_dict == {}:
53+
raise ModelEmptyError("No model is implemented")
4854
exp.experiment_workspace.inject_code(**{"model_lgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
4955
elif exp.sub_workspace_list[0].target_task.model_type == "NN":
56+
if exp.sub_workspace_list[0].code_dict == {}:
57+
raise ModelEmptyError("No model is implemented")
5058
exp.experiment_workspace.inject_code(**{"model_nn.py": exp.sub_workspace_list[0].code_dict["model.py"]})
5159
if RUNNER_SETTINGS.cache_result:
5260
cache_hit, result = self.get_cache_result(exp)
@@ -113,7 +121,7 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
113121
exp.experiment_workspace.data_description.append((sub_ws.target_task.get_task_information(), feature_shape))
114122
current_feature_file_count += 1
115123
if implemented_factor_count == 0:
116-
raise ModelEmptyError("No factor is implemented")
124+
raise FactorEmptyError("No factor is implemented")
117125

118126
if RUNNER_SETTINGS.cache_result:
119127
cache_hit, result = self.get_cache_result(exp)

‎rdagent/scenarios/kaggle/experiment/meta_tpl/fea_share_preprocess.py‎

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
77

88

9-
def prepreprocess():
9+
def prepreprocess(debug_mode=False):
1010
"""
1111
This method loads the data, drops the unnecessary columns, and splits it into train and validation sets.
1212
"""
1313
# Load and preprocess the data
1414
data_df = pd.read_csv("/kaggle/input/train.csv")
15-
data_df = data_df.head(1200)
15+
if debug_mode:
16+
data_df = data_df.sample(frac=0.1, random_state=42)
17+
data_df = data_df
1618
data_df = data_df.drop(["id"], axis=1)
1719

1820
X = data_df.drop(["class"], axis=1)
@@ -79,11 +81,11 @@ def preprocess_transform(X: pd.DataFrame, preprocessor):
7981
return X_transformed
8082

8183

82-
def preprocess_script():
84+
def preprocess_script(debug_mode=False):
8385
"""
8486
This method applies the preprocessing steps to the training, validation, and test datasets.
8587
"""
86-
X_train, X_valid, y_train, y_valid = prepreprocess()
88+
X_train, X_valid, y_train, y_valid = prepreprocess(debug_mode=debug_mode)
8789

8890
# Fit the preprocessor on the training data
8991
preprocessor = preprocess_fit(X_train)
@@ -94,7 +96,8 @@ def preprocess_script():
9496

9597
# Load and preprocess the test data
9698
submission_df = pd.read_csv("/kaggle/input/test.csv")
97-
submission_df = submission_df.head(500)
99+
if debug_mode:
100+
data_df = data_df.sample(frac=0.1, random_state=42)
98101
passenger_ids = submission_df["id"]
99102
submission_df = submission_df.drop(["id"], axis=1)
100103
X_test = preprocess_transform(submission_df, preprocessor)

‎rdagent/scenarios/kaggle/experiment/workspace.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
1414
from fea_share_preprocess import preprocess_script
1515
16-
X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script()
16+
X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script(debug_mode=True)
1717
1818
pickle.dump(X_train, open("X_train.pkl", "wb"))
1919
pickle.dump(X_valid, open("X_valid.pkl", "wb"))

‎rdagent/scenarios/kaggle/proposal/proposal.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
9393

9494
context_dict = {
9595
"hypothesis_and_feedback": hypothesis_feedback,
96-
"RAG": None,
96+
"RAG": rag_content,
9797
"hypothesis_output_format": prompt_dict["hypothesis_output_format"],
9898
"hypothesis_specification": None,
9999
}

0 commit comments

Comments
 (0)