microsoft
diff --git a/‎rdagent/core/experiment.py‎
Lines changed: 1 addition & 1 deletion b/‎rdagent/core/experiment.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/developer/runner.py‎
Lines changed: 10 additions & 2 deletions b/‎rdagent/scenarios/kaggle/developer/runner.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎rdagent/scenarios/kaggle/experiment/meta_tpl/fea_share_preprocess.py‎
Lines changed: 8 additions & 5 deletions b/‎rdagent/scenarios/kaggle/experiment/meta_tpl/fea_share_preprocess.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎rdagent/scenarios/kaggle/experiment/workspace.py‎
Lines changed: 1 addition & 1 deletion b/‎rdagent/scenarios/kaggle/experiment/workspace.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rdagent/scenarios/kaggle/proposal/proposal.py‎
Lines changed: 1 addition & 1 deletion b/‎rdagent/scenarios/kaggle/proposal/proposal.py‎
Lines changed: 1 addition & 1 deletion
@@ -155,7 +155,7 @@ def clear(self) -> None:
         """
         Clear the workspace
         """
-        shutil.rmtree(self.workspace_path)
+        shutil.rmtree(self.workspace_path, ignore_errors=True)
         self.code_dict = {}
 
     def execute(self) -> object | None:
 
@@ -7,7 +7,7 @@
 from rdagent.components.coder.factor_coder.factor import FactorTask
 from rdagent.components.runner import CachedRunner
 from rdagent.components.runner.conf import RUNNER_SETTINGS
-from rdagent.core.exception import ModelEmptyError
+from rdagent.core.exception import FactorEmptyError, ModelEmptyError
 from rdagent.core.experiment import ASpecificExp
 from rdagent.oai.llm_utils import md5_hash
 from rdagent.scenarios.kaggle.experiment.kaggle_experiment import (
@@ -41,12 +41,20 @@ class KGModelRunner(KGCachedRunner[KGModelExperiment]):
     def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
         self.build_from_SOTA(exp)
         if exp.sub_workspace_list[0].target_task.model_type == "XGBoost":
+            if exp.sub_workspace_list[0].code_dict == {}:
+                raise ModelEmptyError("No model is implemented")
             exp.experiment_workspace.inject_code(**{"model_xgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
         elif exp.sub_workspace_list[0].target_task.model_type == "RandomForest":
+            if exp.sub_workspace_list[0].code_dict == {}:
+                raise ModelEmptyError("No model is implemented")
             exp.experiment_workspace.inject_code(**{"model_rf.py": exp.sub_workspace_list[0].code_dict["model.py"]})
         elif exp.sub_workspace_list[0].target_task.model_type == "LightGBM":
+            if exp.sub_workspace_list[0].code_dict == {}:
+                raise ModelEmptyError("No model is implemented")
             exp.experiment_workspace.inject_code(**{"model_lgb.py": exp.sub_workspace_list[0].code_dict["model.py"]})
         elif exp.sub_workspace_list[0].target_task.model_type == "NN":
+            if exp.sub_workspace_list[0].code_dict == {}:
+                raise ModelEmptyError("No model is implemented")
             exp.experiment_workspace.inject_code(**{"model_nn.py": exp.sub_workspace_list[0].code_dict["model.py"]})
         if RUNNER_SETTINGS.cache_result:
             cache_hit, result = self.get_cache_result(exp)
@@ -113,7 +121,7 @@ def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
             exp.experiment_workspace.data_description.append((sub_ws.target_task.get_task_information(), feature_shape))
             current_feature_file_count += 1
         if implemented_factor_count == 0:
-            raise ModelEmptyError("No factor is implemented")
+            raise FactorEmptyError("No factor is implemented")
 
         if RUNNER_SETTINGS.cache_result:
             cache_hit, result = self.get_cache_result(exp)
 
@@ -6,13 +6,15 @@
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 
 
-def prepreprocess():
+def prepreprocess(debug_mode=False):
     """
     This method loads the data, drops the unnecessary columns, and splits it into train and validation sets.
     """
     # Load and preprocess the data
     data_df = pd.read_csv("/kaggle/input/train.csv")
-    data_df = data_df.head(1200)
+    if debug_mode:
+        data_df = data_df.sample(frac=0.1, random_state=42)
+    data_df = data_df
     data_df = data_df.drop(["id"], axis=1)
 
     X = data_df.drop(["class"], axis=1)
@@ -79,11 +81,11 @@ def preprocess_transform(X: pd.DataFrame, preprocessor):
     return X_transformed
 
 
-def preprocess_script():
+def preprocess_script(debug_mode=False):
     """
     This method applies the preprocessing steps to the training, validation, and test datasets.
     """
-    X_train, X_valid, y_train, y_valid = prepreprocess()
+    X_train, X_valid, y_train, y_valid = prepreprocess(debug_mode=debug_mode)
 
     # Fit the preprocessor on the training data
     preprocessor = preprocess_fit(X_train)
@@ -94,7 +96,8 @@ def preprocess_script():
 
     # Load and preprocess the test data
     submission_df = pd.read_csv("/kaggle/input/test.csv")
-    submission_df = submission_df.head(500)
+    if debug_mode:
+        data_df = data_df.sample(frac=0.1, random_state=42)
     passenger_ids = submission_df["id"]
     submission_df = submission_df.drop(["id"], axis=1)
     X_test = preprocess_transform(submission_df, preprocessor)
 
@@ -13,7 +13,7 @@
 
 from fea_share_preprocess import preprocess_script
 
-X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script()
+X_train, X_valid, y_train, y_valid, X_test, passenger_ids = preprocess_script(debug_mode=True)
 
 pickle.dump(X_train, open("X_train.pkl", "wb"))
 pickle.dump(X_valid, open("X_valid.pkl", "wb"))
 
@@ -93,7 +93,7 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
 
         context_dict = {
             "hypothesis_and_feedback": hypothesis_feedback,
-            "RAG": None,
+            "RAG": rag_content,
             "hypothesis_output_format": prompt_dict["hypothesis_output_format"],
             "hypothesis_specification": None,
         }
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:`
`93`	`93`
`94`	`94`	`context_dict = {`
`95`	`95`	`"hypothesis_and_feedback": hypothesis_feedback,`
`96`		`- "RAG": None,`
	`96`	`+ "RAG": rag_content,`
`97`	`97`	`"hypothesis_output_format": prompt_dict["hypothesis_output_format"],`
`98`	`98`	`"hypothesis_specification": None,`
`99`	`99`	`}`