From a783c95686d8b86e180725e26d7a49385afcdaa1 Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Mon, 28 Oct 2024 21:03:56 +0800
Subject: [PATCH 01/13] feat: demo2

---
 package.json                                  |   1 +
 pnpm-lock.yaml                                |   8 +
 src/DataInterpreter/index.md                  |   5 +-
 src/components/demo2/datas/credit-g/tree.json | 262 +++++++++++
 .../demo2/datas/credit-g/tree_01.json         |  24 +
 .../demo2/datas/credit-g/tree_02.json         |  79 ++++
 .../demo2/datas/credit-g/tree_03.json         | 134 ++++++
 .../demo2/datas/credit-g/tree_04.json         | 145 ++++++
 .../demo2/datas/credit-g/tree_05.json         | 145 ++++++
 .../demo2/datas/credit-g/tree_06.json         | 200 ++++++++
 .../demo2/datas/credit-g/tree_07.json         | 211 +++++++++
 .../demo2/datas/credit-g/tree_08.json         | 222 +++++++++
 .../demo2/datas/credit-g/tree_09.json         | 277 ++++++++++++
 .../demo2/datas/credit-g/tree_10.json         | 288 ++++++++++++
 src/components/demo2/demo2.vue                | 427 ++++++++++++++++++
 src/components/demoList.vue                   |   2 +-
 16 files changed, 2427 insertions(+), 3 deletions(-)
 create mode 100644 src/components/demo2/datas/credit-g/tree.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_01.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_02.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_03.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_04.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_05.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_06.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_07.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_08.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_09.json
 create mode 100644 src/components/demo2/datas/credit-g/tree_10.json
 create mode 100644 src/components/demo2/demo2.vue
diff --git a/package.json b/package.json
index ae3a2e92..c0c2774a 100644
--- a/package.json
+++ b/package.json
@@ -23,6 +23,7 @@
     "@vueuse/core": "^10.6.1",
     "dayjs": "^1.11.10",
     "execa": "^8.0.1",
+    "highlight.js": "^11.10.0",
     "lint-staged": "^15.0.2",
     "prettier": "^3.0.3",
     "sass": "^1.71.1",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 30890144..48ac2ccf 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -35,6 +35,9 @@ dependencies:
   execa:
     specifier: ^8.0.1
     version: 8.0.1
+  highlight.js:
+    specifier: ^11.10.0
+    version: 11.10.0
   lint-staged:
     specifier: ^15.0.2
     version: 15.0.2
@@ -2012,6 +2015,11 @@ packages:
       function-bind: 1.1.2
     dev: false
 
+  /highlight.js@11.10.0:
+    resolution: {integrity: sha512-SYVnVFswQER+zu1laSya563s+F8VDGt7o35d4utbamowvUNLLMovFqwCLSocpZTz3MgaSRA1IbqRWZv97dtErQ==}
+    engines: {node: '>=12.0.0'}
+    dev: false
+
   /html-tags@3.3.1:
     resolution: {integrity: sha512-ztqyC3kLto0e9WbNp0aeP+M3kTt+nbaIveGmUxAtZa+8iFgKLUOD4YKM5j+f3QD89bra7UeumolZHKuOXnTmeQ==}
     engines: {node: '>=8'}
diff --git a/src/DataInterpreter/index.md b/src/DataInterpreter/index.md
index a98a6681..d84a5413 100644
--- a/src/DataInterpreter/index.md
+++ b/src/DataInterpreter/index.md
@@ -4,7 +4,8 @@ footer: false
 ---
 
 <script setup>
-  import DemoList from '@/components/demoList.vue';
+  // import DemoList from '@/components/demoList.vue';
+  import Demo2 from '@/components/demo2/demo2.vue';
 </script>
 
-<DemoList />
+<Demo2 />
diff --git a/src/components/demo2/datas/credit-g/tree.json b/src/components/demo2/datas/credit-g/tree.json
new file mode 100644
index 00000000..07f9c134
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree.json
@@ -0,0 +1,262 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n"
+    },
+    {
+        "id": "0-0",
+        "parent_id": "0",
+        "avg_score": "82.5",
+        "dev_score": "83.2",
+        "visits": 2,
+        "order": 6,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before engineering new features or transforming existing ones to improve model performance.",
+        "code": "import numpy as np\n\n# Function to perform correlation analysis and identify highly correlated features\ndef correlation_analysis(df):\n    corr_matrix = df.corr()\n    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]\n    return to_drop\n\n# Perform correlation analysis on the processed train dataset\nto_drop = correlation_analysis(train_df_processed.drop(columns=['class']))\n\n# Print the features to drop due to high correlation\nprint(\"Features to drop due to high correlation:\", to_drop)\n\n# Drop the identified features from the train, dev, and test datasets\ntrain_df_processed = train_df_processed.drop(columns=to_drop)\ndev_df_processed = dev_df_processed.drop(columns=to_drop)\ntest_df_processed = test_df_processed.drop(columns=to_drop)\n\n# Print the shape of the datasets after dropping highly correlated features\nprint(\"Shape of processed train dataset after dropping features:\", train_df_processed.shape)\nprint(\"Shape of processed dev dataset after dropping features:\", dev_df_processed.shape)\nprint(\"Shape of processed test dataset after dropping features:\", test_df_processed.shape)\n"
+    },
+    {
+        "id": "0-0-0",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": ""
+    },
+    {
+        "id": "0-0-1",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable 'class' to check for class imbalance. If significant class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": ""
+    },
+    {
+        "id": "0-0-2",
+        "parent_id": "0-0",
+        "avg_score": "81.8",
+        "dev_score": "81.8",
+        "visits": 1,
+        "order": 7,
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\nfrom sklearn.neighbors import KNeighborsClassifier\n\n# Prepare data for modeling\nX_train = train_df_processed.drop(columns=['class'])\ny_train = train_df_processed['class']\nX_dev = dev_df_processed.drop(columns=['class'])\ny_dev = dev_df_processed['class']\nX_test = test_df_processed\n\n# Define base models\nbase_models = [\n    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, learning_rate=0.1, max_depth=5)),\n    ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('knn', KNeighborsClassifier(n_neighbors=5))\n]\n\n# Define meta-model\nmeta_model = LogisticRegression()\n\n# Create stacking ensemble model\nstacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)\n\n# Train the stacking model\nstacking_model.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = stacking_model.predict(X_dev)\n\n# Predict on test set\ny_test_pred = stacking_model.predict(X_test)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, stacking_model.predict(X_train))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n"
+    },
+    {
+        "id": "0-0-3",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set, after analyzing the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": ""
+    },
+    {
+        "id": "0-0-4",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": ""
+    },
+    {
+        "id": "0-1",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 1,
+        "order": 8,
+        "instruction": "Analyze the distribution of the target variable to check for class imbalance and decide on appropriate sampling techniques if necessary, before engineering features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.decomposition import PCA\nfrom sklearn.preprocessing import PolynomialFeatures\n\ndef feature_engineering(df):\n    df_copy = df.copy()\n    \n    # Remove the target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Apply PCA for dimensionality reduction\n    pca = PCA(n_components=0.95)  # Keep 95% of the variance\n    pca_features = pca.fit_transform(df_copy)\n    pca_df = pd.DataFrame(pca_features, columns=[f'PCA_{i}' for i in range(pca_features.shape[1])])\n    \n    # Apply PolynomialFeatures for interaction terms\n    poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n    poly_features = poly.fit_transform(df_copy)\n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    poly_df = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    # Combine PCA and Polynomial features\n    combined_df = pd.concat([pca_df, poly_df], axis=1)\n    \n    # Re-add the target column if it was removed\n    if y is not None:\n        combined_df['class'] = y\n    \n    return combined_df\n\n# Apply feature engineering to train, dev, and test sets\ntrain_df_engineered = feature_engineering(train_df_processed)\ndev_df_engineered = feature_engineering(dev_df_processed)\ntest_df_engineered = feature_engineering(test_df_processed)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n"
+    },
+    {
+        "id": "0-2",
+        "parent_id": "0",
+        "avg_score": "80.9",
+        "dev_score": "82.9",
+        "visits": 2,
+        "order": 9,
+        "instruction": "Visualize the distribution of numerical features to identify outliers and skewed distributions, then engineer features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_engineered, poly = engineer_features(train_df_processed)\ndev_df_engineered, _ = engineer_features(dev_df_processed, poly)\ntest_df_engineered, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n"
+    },
+    {
+        "id": "0-2-0",
+        "parent_id": "0-2",
+        "avg_score": "78.9",
+        "dev_score": "78.9",
+        "visits": 1,
+        "order": 10,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "import numpy as np\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.feature_selection import SelectFromModel\nimport pandas as pd\nfrom sklearn.preprocessing import StandardScaler, PolynomialFeatures\n\n# Define the preprocess_data function\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    if y is not None:\n        df_copy['class'] = y\n    return df_copy, scaler\n\n# Define the engineer_features function\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    df_copy = df_copy.select_dtypes(include=['number'])\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    if y is not None:\n        df_poly['class'] = y\n    return df_poly, poly\n\n# Load the datasets if they are not already loaded\nif 'train_df_engineered' not in locals():\n    train_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\n    train_df_engineered = preprocess_data(train_df)[0]\n    train_df_engineered, _ = engineer_features(train_df_engineered)\n\nif 'dev_df_engineered' not in locals():\n    dev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n    dev_df_engineered = preprocess_data(dev_df)[0]\n    dev_df_engineered, _ = engineer_features(dev_df_engineered)\n\nif 'test_df_engineered' not in locals():\n    test_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\n    test_df_engineered = preprocess_data(test_df)[0]\n    test_df_engineered, _ = engineer_features(test_df_engineered)\n\n# Function to optimize data types\ndef optimize_dtypes(df):\n    for col in df.columns:\n        if df[col].dtype == 'float64':\n            df[col] = df[col].astype('float32')\n        elif df[col].dtype == 'int64':\n            df[col] = df[col].astype('int32')\n    return df\n\n# Optimize data types for train, dev, and test datasets\ntrain_df_engineered = optimize_dtypes(train_df_engineered)\ndev_df_engineered = optimize_dtypes(dev_df_engineered)\ntest_df_engineered = optimize_dtypes(test_df_engineered)\n\n# Prepare data for modeling\nX_train = train_df_engineered.drop(columns=['class'])\ny_train = train_df_engineered['class']\nX_dev = dev_df_engineered.drop(columns=['class'])\ny_dev = dev_df_engineered['class']\nX_test = test_df_engineered\n\n# Feature selection using SelectFromModel\nbase_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')\nselector = SelectFromModel(base_model, threshold='median')\nselector.fit(X_train, y_train)\n\nX_train_selected = selector.transform(X_train)\nX_dev_selected = selector.transform(X_dev)\nX_test_selected = selector.transform(X_test)\n\n# Define the final model\nfinal_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=5)\n\n# Train the final model\nfinal_model.fit(X_train_selected, y_train)\n\n# Predict on dev set\ny_dev_pred = final_model.predict(X_dev_selected)\n\n# Predict on test set\ny_test_pred = final_model.predict(X_test_selected)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, final_model.predict(X_train_selected))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n"
+    },
+    {
+        "id": "0-2-1",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Before training, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset.",
+        "code": ""
+    },
+    {
+        "id": "0-2-2",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": ""
+    },
+    {
+        "id": "0-2-3",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": ""
+    },
+    {
+        "id": "0-2-4",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": ""
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n"
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": ""
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": ""
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": ""
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n"
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": ""
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": ""
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n"
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": ""
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": ""
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": ""
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": ""
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_01.json b/src/components/demo2/datas/credit-g/tree_01.json
new file mode 100644
index 00000000..66cbf7da
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_01.json
@@ -0,0 +1,24 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_02.json b/src/components/demo2/datas/credit-g/tree_02.json
new file mode 100644
index 00000000..0c01ebaf
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_02.json
@@ -0,0 +1,79 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_03.json b/src/components/demo2/datas/credit-g/tree_03.json
new file mode 100644
index 00000000..0f0cc77f
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_03.json
@@ -0,0 +1,134 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": true
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_04.json b/src/components/demo2/datas/credit-g/tree_04.json
new file mode 100644
index 00000000..85db669d
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_04.json
@@ -0,0 +1,145 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": true
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": true
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_05.json b/src/components/demo2/datas/credit-g/tree_05.json
new file mode 100644
index 00000000..9d37c727
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_05.json
@@ -0,0 +1,145 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_06.json b/src/components/demo2/datas/credit-g/tree_06.json
new file mode 100644
index 00000000..95cd19e7
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_06.json
@@ -0,0 +1,200 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-0",
+        "parent_id": "0",
+        "avg_score": "82.5",
+        "dev_score": "83.2",
+        "visits": 2,
+        "order": 6,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before engineering new features or transforming existing ones to improve model performance.",
+        "code": "import numpy as np\n\n# Function to perform correlation analysis and identify highly correlated features\ndef correlation_analysis(df):\n    corr_matrix = df.corr()\n    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]\n    return to_drop\n\n# Perform correlation analysis on the processed train dataset\nto_drop = correlation_analysis(train_df_processed.drop(columns=['class']))\n\n# Print the features to drop due to high correlation\nprint(\"Features to drop due to high correlation:\", to_drop)\n\n# Drop the identified features from the train, dev, and test datasets\ntrain_df_processed = train_df_processed.drop(columns=to_drop)\ndev_df_processed = dev_df_processed.drop(columns=to_drop)\ntest_df_processed = test_df_processed.drop(columns=to_drop)\n\n# Print the shape of the datasets after dropping highly correlated features\nprint(\"Shape of processed train dataset after dropping features:\", train_df_processed.shape)\nprint(\"Shape of processed dev dataset after dropping features:\", dev_df_processed.shape)\nprint(\"Shape of processed test dataset after dropping features:\", test_df_processed.shape)\n",
+        "active": true
+    },
+    {
+        "id": "0-0-0",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-1",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable 'class' to check for class imbalance. If significant class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-3",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set, after analyzing the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-4",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_07.json b/src/components/demo2/datas/credit-g/tree_07.json
new file mode 100644
index 00000000..80efa243
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_07.json
@@ -0,0 +1,211 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-0",
+        "parent_id": "0",
+        "avg_score": "82.5",
+        "dev_score": "83.2",
+        "visits": 2,
+        "order": 6,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before engineering new features or transforming existing ones to improve model performance.",
+        "code": "import numpy as np\n\n# Function to perform correlation analysis and identify highly correlated features\ndef correlation_analysis(df):\n    corr_matrix = df.corr()\n    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]\n    return to_drop\n\n# Perform correlation analysis on the processed train dataset\nto_drop = correlation_analysis(train_df_processed.drop(columns=['class']))\n\n# Print the features to drop due to high correlation\nprint(\"Features to drop due to high correlation:\", to_drop)\n\n# Drop the identified features from the train, dev, and test datasets\ntrain_df_processed = train_df_processed.drop(columns=to_drop)\ndev_df_processed = dev_df_processed.drop(columns=to_drop)\ntest_df_processed = test_df_processed.drop(columns=to_drop)\n\n# Print the shape of the datasets after dropping highly correlated features\nprint(\"Shape of processed train dataset after dropping features:\", train_df_processed.shape)\nprint(\"Shape of processed dev dataset after dropping features:\", dev_df_processed.shape)\nprint(\"Shape of processed test dataset after dropping features:\", test_df_processed.shape)\n",
+        "active": true
+    },
+    {
+        "id": "0-0-0",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-1",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable 'class' to check for class imbalance. If significant class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-2",
+        "parent_id": "0-0",
+        "avg_score": "81.8",
+        "dev_score": "81.8",
+        "visits": 1,
+        "order": 7,
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\nfrom sklearn.neighbors import KNeighborsClassifier\n\n# Prepare data for modeling\nX_train = train_df_processed.drop(columns=['class'])\ny_train = train_df_processed['class']\nX_dev = dev_df_processed.drop(columns=['class'])\ny_dev = dev_df_processed['class']\nX_test = test_df_processed\n\n# Define base models\nbase_models = [\n    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, learning_rate=0.1, max_depth=5)),\n    ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('knn', KNeighborsClassifier(n_neighbors=5))\n]\n\n# Define meta-model\nmeta_model = LogisticRegression()\n\n# Create stacking ensemble model\nstacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)\n\n# Train the stacking model\nstacking_model.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = stacking_model.predict(X_dev)\n\n# Predict on test set\ny_test_pred = stacking_model.predict(X_test)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, stacking_model.predict(X_train))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n",
+        "active": true
+    },
+    {
+        "id": "0-0-3",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set, after analyzing the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-4",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_08.json b/src/components/demo2/datas/credit-g/tree_08.json
new file mode 100644
index 00000000..fbf0cf7a
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_08.json
@@ -0,0 +1,222 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-0",
+        "parent_id": "0",
+        "avg_score": "82.5",
+        "dev_score": "83.2",
+        "visits": 2,
+        "order": 6,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before engineering new features or transforming existing ones to improve model performance.",
+        "code": "import numpy as np\n\n# Function to perform correlation analysis and identify highly correlated features\ndef correlation_analysis(df):\n    corr_matrix = df.corr()\n    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]\n    return to_drop\n\n# Perform correlation analysis on the processed train dataset\nto_drop = correlation_analysis(train_df_processed.drop(columns=['class']))\n\n# Print the features to drop due to high correlation\nprint(\"Features to drop due to high correlation:\", to_drop)\n\n# Drop the identified features from the train, dev, and test datasets\ntrain_df_processed = train_df_processed.drop(columns=to_drop)\ndev_df_processed = dev_df_processed.drop(columns=to_drop)\ntest_df_processed = test_df_processed.drop(columns=to_drop)\n\n# Print the shape of the datasets after dropping highly correlated features\nprint(\"Shape of processed train dataset after dropping features:\", train_df_processed.shape)\nprint(\"Shape of processed dev dataset after dropping features:\", dev_df_processed.shape)\nprint(\"Shape of processed test dataset after dropping features:\", test_df_processed.shape)\n",
+        "active": false
+    },
+    {
+        "id": "0-0-0",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-1",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable 'class' to check for class imbalance. If significant class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-2",
+        "parent_id": "0-0",
+        "avg_score": "81.8",
+        "dev_score": "81.8",
+        "visits": 1,
+        "order": 7,
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\nfrom sklearn.neighbors import KNeighborsClassifier\n\n# Prepare data for modeling\nX_train = train_df_processed.drop(columns=['class'])\ny_train = train_df_processed['class']\nX_dev = dev_df_processed.drop(columns=['class'])\ny_dev = dev_df_processed['class']\nX_test = test_df_processed\n\n# Define base models\nbase_models = [\n    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, learning_rate=0.1, max_depth=5)),\n    ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('knn', KNeighborsClassifier(n_neighbors=5))\n]\n\n# Define meta-model\nmeta_model = LogisticRegression()\n\n# Create stacking ensemble model\nstacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)\n\n# Train the stacking model\nstacking_model.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = stacking_model.predict(X_dev)\n\n# Predict on test set\ny_test_pred = stacking_model.predict(X_test)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, stacking_model.predict(X_train))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-0-3",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set, after analyzing the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-4",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-1",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 1,
+        "order": 8,
+        "instruction": "Analyze the distribution of the target variable to check for class imbalance and decide on appropriate sampling techniques if necessary, before engineering features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.decomposition import PCA\nfrom sklearn.preprocessing import PolynomialFeatures\n\ndef feature_engineering(df):\n    df_copy = df.copy()\n    \n    # Remove the target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Apply PCA for dimensionality reduction\n    pca = PCA(n_components=0.95)  # Keep 95% of the variance\n    pca_features = pca.fit_transform(df_copy)\n    pca_df = pd.DataFrame(pca_features, columns=[f'PCA_{i}' for i in range(pca_features.shape[1])])\n    \n    # Apply PolynomialFeatures for interaction terms\n    poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n    poly_features = poly.fit_transform(df_copy)\n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    poly_df = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    # Combine PCA and Polynomial features\n    combined_df = pd.concat([pca_df, poly_df], axis=1)\n    \n    # Re-add the target column if it was removed\n    if y is not None:\n        combined_df['class'] = y\n    \n    return combined_df\n\n# Apply feature engineering to train, dev, and test sets\ntrain_df_engineered = feature_engineering(train_df_processed)\ndev_df_engineered = feature_engineering(dev_df_processed)\ntest_df_engineered = feature_engineering(test_df_processed)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_09.json b/src/components/demo2/datas/credit-g/tree_09.json
new file mode 100644
index 00000000..817809ac
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_09.json
@@ -0,0 +1,277 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-0",
+        "parent_id": "0",
+        "avg_score": "82.5",
+        "dev_score": "83.2",
+        "visits": 2,
+        "order": 6,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before engineering new features or transforming existing ones to improve model performance.",
+        "code": "import numpy as np\n\n# Function to perform correlation analysis and identify highly correlated features\ndef correlation_analysis(df):\n    corr_matrix = df.corr()\n    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]\n    return to_drop\n\n# Perform correlation analysis on the processed train dataset\nto_drop = correlation_analysis(train_df_processed.drop(columns=['class']))\n\n# Print the features to drop due to high correlation\nprint(\"Features to drop due to high correlation:\", to_drop)\n\n# Drop the identified features from the train, dev, and test datasets\ntrain_df_processed = train_df_processed.drop(columns=to_drop)\ndev_df_processed = dev_df_processed.drop(columns=to_drop)\ntest_df_processed = test_df_processed.drop(columns=to_drop)\n\n# Print the shape of the datasets after dropping highly correlated features\nprint(\"Shape of processed train dataset after dropping features:\", train_df_processed.shape)\nprint(\"Shape of processed dev dataset after dropping features:\", dev_df_processed.shape)\nprint(\"Shape of processed test dataset after dropping features:\", test_df_processed.shape)\n",
+        "active": false
+    },
+    {
+        "id": "0-0-0",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-1",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable 'class' to check for class imbalance. If significant class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-2",
+        "parent_id": "0-0",
+        "avg_score": "81.8",
+        "dev_score": "81.8",
+        "visits": 1,
+        "order": 7,
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\nfrom sklearn.neighbors import KNeighborsClassifier\n\n# Prepare data for modeling\nX_train = train_df_processed.drop(columns=['class'])\ny_train = train_df_processed['class']\nX_dev = dev_df_processed.drop(columns=['class'])\ny_dev = dev_df_processed['class']\nX_test = test_df_processed\n\n# Define base models\nbase_models = [\n    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, learning_rate=0.1, max_depth=5)),\n    ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('knn', KNeighborsClassifier(n_neighbors=5))\n]\n\n# Define meta-model\nmeta_model = LogisticRegression()\n\n# Create stacking ensemble model\nstacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)\n\n# Train the stacking model\nstacking_model.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = stacking_model.predict(X_dev)\n\n# Predict on test set\ny_test_pred = stacking_model.predict(X_test)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, stacking_model.predict(X_train))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-0-3",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set, after analyzing the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-4",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-1",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 1,
+        "order": 8,
+        "instruction": "Analyze the distribution of the target variable to check for class imbalance and decide on appropriate sampling techniques if necessary, before engineering features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.decomposition import PCA\nfrom sklearn.preprocessing import PolynomialFeatures\n\ndef feature_engineering(df):\n    df_copy = df.copy()\n    \n    # Remove the target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Apply PCA for dimensionality reduction\n    pca = PCA(n_components=0.95)  # Keep 95% of the variance\n    pca_features = pca.fit_transform(df_copy)\n    pca_df = pd.DataFrame(pca_features, columns=[f'PCA_{i}' for i in range(pca_features.shape[1])])\n    \n    # Apply PolynomialFeatures for interaction terms\n    poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n    poly_features = poly.fit_transform(df_copy)\n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    poly_df = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    # Combine PCA and Polynomial features\n    combined_df = pd.concat([pca_df, poly_df], axis=1)\n    \n    # Re-add the target column if it was removed\n    if y is not None:\n        combined_df['class'] = y\n    \n    return combined_df\n\n# Apply feature engineering to train, dev, and test sets\ntrain_df_engineered = feature_engineering(train_df_processed)\ndev_df_engineered = feature_engineering(dev_df_processed)\ntest_df_engineered = feature_engineering(test_df_processed)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-2",
+        "parent_id": "0",
+        "avg_score": "80.9",
+        "dev_score": "82.9",
+        "visits": 2,
+        "order": 9,
+        "instruction": "Visualize the distribution of numerical features to identify outliers and skewed distributions, then engineer features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_engineered, poly = engineer_features(train_df_processed)\ndev_df_engineered, _ = engineer_features(dev_df_processed, poly)\ntest_df_engineered, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-2-1",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Before training, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-2-2",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-2-3",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-2-4",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/datas/credit-g/tree_10.json b/src/components/demo2/datas/credit-g/tree_10.json
new file mode 100644
index 00000000..19c970b7
--- /dev/null
+++ b/src/components/demo2/datas/credit-g/tree_10.json
@@ -0,0 +1,288 @@
+[
+    {
+        "id": "0",
+        "parent_id": null,
+        "avg_score": "73.7",
+        "dev_score": "83.4",
+        "visits": 10,
+        "order": 1,
+        "instruction": "Perform exploratory data analysis on the train and dev sets to understand the dataset structure and characteristics.\n\nPreprocess the train, dev, and test sets by handling missing values, encoding categorical variables, and scaling numerical features.",
+        "code": "import pandas as pd\nimport numpy as np\n\n# Load the dataset\ntrain_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\ndev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n\n# Display basic information about the datasets\nprint(\"Train Dataset Info:\")\nprint(train_df.info())\nprint(\"\\nDev Dataset Info:\")\nprint(dev_df.info())\n\n# Display summary statistics for numerical columns\nprint(\"\\nTrain Dataset Summary Statistics:\")\nprint(train_df.describe())\nprint(\"\\nDev Dataset Summary Statistics:\")\nprint(dev_df.describe())\n\n# Display the number of unique values for categorical columns\ncategorical_columns = train_df.select_dtypes(include=['object', 'category']).columns\nprint(\"\\nUnique Values in Categorical Columns:\")\nfor col in categorical_columns:\n    print(f\"{col}: {train_df[col].nunique()} unique values\")\n\n# Check for missing values\nprint(\"\\nMissing Values in Train Dataset:\")\nprint(train_df.isnull().sum())\nprint(\"\\nMissing Values in Dev Dataset:\")\nprint(dev_df.isnull().sum())\n\n# Check the distribution of the target column\nprint(\"\\nTarget Column Distribution in Train Dataset:\")\nprint(train_df['class'].value_counts(normalize=True))\nprint(\"\\nTarget Column Distribution in Dev Dataset:\")\nprint(dev_df['class'].value_counts(normalize=True))\n\n\nfrom sklearn.preprocessing import StandardScaler\n\n# Function to preprocess data\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    \n    # Separate target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Scale numerical features\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    \n    # Reattach target column if it was separated\n    if y is not None:\n        df_copy['class'] = y\n    \n    return df_copy, scaler\n\n# Preprocess train, dev, and test sets\ntrain_df_processed, scaler = preprocess_data(train_df)\ndev_df_processed, _ = preprocess_data(dev_df, scaler)\ntest_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\ntest_df_processed, _ = preprocess_data(test_df, scaler)\n\n# Display the first few rows of the processed train dataset\nprint(\"Processed Train Dataset:\")\nprint(train_df_processed.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-0",
+        "parent_id": "0",
+        "avg_score": "82.5",
+        "dev_score": "83.2",
+        "visits": 2,
+        "order": 6,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before engineering new features or transforming existing ones to improve model performance.",
+        "code": "import numpy as np\n\n# Function to perform correlation analysis and identify highly correlated features\ndef correlation_analysis(df):\n    corr_matrix = df.corr()\n    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n    to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]\n    return to_drop\n\n# Perform correlation analysis on the processed train dataset\nto_drop = correlation_analysis(train_df_processed.drop(columns=['class']))\n\n# Print the features to drop due to high correlation\nprint(\"Features to drop due to high correlation:\", to_drop)\n\n# Drop the identified features from the train, dev, and test datasets\ntrain_df_processed = train_df_processed.drop(columns=to_drop)\ndev_df_processed = dev_df_processed.drop(columns=to_drop)\ntest_df_processed = test_df_processed.drop(columns=to_drop)\n\n# Print the shape of the datasets after dropping highly correlated features\nprint(\"Shape of processed train dataset after dropping features:\", train_df_processed.shape)\nprint(\"Shape of processed dev dataset after dropping features:\", dev_df_processed.shape)\nprint(\"Shape of processed test dataset after dropping features:\", test_df_processed.shape)\n",
+        "active": false
+    },
+    {
+        "id": "0-0-0",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-1",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable 'class' to check for class imbalance. If significant class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-2",
+        "parent_id": "0-0",
+        "avg_score": "81.8",
+        "dev_score": "81.8",
+        "visits": 1,
+        "order": 7,
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.ensemble import StackingClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\nfrom sklearn.neighbors import KNeighborsClassifier\n\n# Prepare data for modeling\nX_train = train_df_processed.drop(columns=['class'])\ny_train = train_df_processed['class']\nX_dev = dev_df_processed.drop(columns=['class'])\ny_dev = dev_df_processed['class']\nX_test = test_df_processed\n\n# Define base models\nbase_models = [\n    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, learning_rate=0.1, max_depth=5)),\n    ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),\n    ('knn', KNeighborsClassifier(n_neighbors=5))\n]\n\n# Define meta-model\nmeta_model = LogisticRegression()\n\n# Create stacking ensemble model\nstacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)\n\n# Train the stacking model\nstacking_model.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = stacking_model.predict(X_dev)\n\n# Predict on test set\ny_test_pred = stacking_model.predict(X_test)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, stacking_model.predict(X_train))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-0-3",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set, after analyzing the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-0-4",
+        "parent_id": "0-0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "6.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-1",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 1,
+        "order": 8,
+        "instruction": "Analyze the distribution of the target variable to check for class imbalance and decide on appropriate sampling techniques if necessary, before engineering features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.decomposition import PCA\nfrom sklearn.preprocessing import PolynomialFeatures\n\ndef feature_engineering(df):\n    df_copy = df.copy()\n    \n    # Remove the target column if it exists\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Apply PCA for dimensionality reduction\n    pca = PCA(n_components=0.95)  # Keep 95% of the variance\n    pca_features = pca.fit_transform(df_copy)\n    pca_df = pd.DataFrame(pca_features, columns=[f'PCA_{i}' for i in range(pca_features.shape[1])])\n    \n    # Apply PolynomialFeatures for interaction terms\n    poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n    poly_features = poly.fit_transform(df_copy)\n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    poly_df = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    # Combine PCA and Polynomial features\n    combined_df = pd.concat([pca_df, poly_df], axis=1)\n    \n    # Re-add the target column if it was removed\n    if y is not None:\n        combined_df['class'] = y\n    \n    return combined_df\n\n# Apply feature engineering to train, dev, and test sets\ntrain_df_engineered = feature_engineering(train_df_processed)\ndev_df_engineered = feature_engineering(dev_df_processed)\ntest_df_engineered = feature_engineering(test_df_processed)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-2",
+        "parent_id": "0",
+        "avg_score": "80.9",
+        "dev_score": "82.9",
+        "visits": 2,
+        "order": 9,
+        "instruction": "Visualize the distribution of numerical features to identify outliers and skewed distributions, then engineer features by creating new columns or transforming existing ones to improve model performance.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_engineered, poly = engineer_features(train_df_processed)\ndev_df_engineered, _ = engineer_features(dev_df_processed, poly)\ntest_df_engineered, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Engineered Train Dataset:\")\nprint(train_df_engineered.head())\n",
+        "active": true
+    },
+    {
+        "id": "0-2-0",
+        "parent_id": "0-2",
+        "avg_score": "78.9",
+        "dev_score": "78.9",
+        "visits": 1,
+        "order": 10,
+        "instruction": "Perform a correlation analysis to identify highly correlated features and potential multicollinearity issues before training a base model to predict the target column 'class' on the train set.",
+        "code": "import numpy as np\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\nfrom sklearn.feature_selection import SelectFromModel\nimport pandas as pd\nfrom sklearn.preprocessing import StandardScaler, PolynomialFeatures\n\n# Define the preprocess_data function\ndef preprocess_data(df, scaler=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    if scaler is None:\n        scaler = StandardScaler()\n        df_copy[df_copy.columns] = scaler.fit_transform(df_copy)\n    else:\n        df_copy[df_copy.columns] = scaler.transform(df_copy)\n    if y is not None:\n        df_copy['class'] = y\n    return df_copy, scaler\n\n# Define the engineer_features function\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    df_copy = df_copy.select_dtypes(include=['number'])\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    if y is not None:\n        df_poly['class'] = y\n    return df_poly, poly\n\n# Load the datasets if they are not already loaded\nif 'train_df_engineered' not in locals():\n    train_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_train.csv')\n    train_df_engineered = preprocess_data(train_df)[0]\n    train_df_engineered, _ = engineer_features(train_df_engineered)\n\nif 'dev_df_engineered' not in locals():\n    dev_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_dev.csv')\n    dev_df_engineered = preprocess_data(dev_df)[0]\n    dev_df_engineered, _ = engineer_features(dev_df_engineered)\n\nif 'test_df_engineered' not in locals():\n    test_df = pd.read_csv('/data/chiyizhou/datasets/jasmine/split_test_wo_target.csv')\n    test_df_engineered = preprocess_data(test_df)[0]\n    test_df_engineered, _ = engineer_features(test_df_engineered)\n\n# Function to optimize data types\ndef optimize_dtypes(df):\n    for col in df.columns:\n        if df[col].dtype == 'float64':\n            df[col] = df[col].astype('float32')\n        elif df[col].dtype == 'int64':\n            df[col] = df[col].astype('int32')\n    return df\n\n# Optimize data types for train, dev, and test datasets\ntrain_df_engineered = optimize_dtypes(train_df_engineered)\ndev_df_engineered = optimize_dtypes(dev_df_engineered)\ntest_df_engineered = optimize_dtypes(test_df_engineered)\n\n# Prepare data for modeling\nX_train = train_df_engineered.drop(columns=['class'])\ny_train = train_df_engineered['class']\nX_dev = dev_df_engineered.drop(columns=['class'])\ny_dev = dev_df_engineered['class']\nX_test = test_df_engineered\n\n# Feature selection using SelectFromModel\nbase_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')\nselector = SelectFromModel(base_model, threshold='median')\nselector.fit(X_train, y_train)\n\nX_train_selected = selector.transform(X_train)\nX_dev_selected = selector.transform(X_dev)\nX_test_selected = selector.transform(X_test)\n\n# Define the final model\nfinal_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=5)\n\n# Train the final model\nfinal_model.fit(X_train_selected, y_train)\n\n# Predict on dev set\ny_dev_pred = final_model.predict(X_dev_selected)\n\n# Predict on test set\ny_test_pred = final_model.predict(X_test_selected)\n\n# Save predictions\npd.DataFrame({'target': y_dev_pred}).to_csv('../workspace/jasmine/dev_predictions.csv', index=False)\npd.DataFrame({'target': y_test_pred}).to_csv('../workspace/jasmine/test_predictions.csv', index=False)\n\n# Evaluate performance\ntrain_f1 = f1_score(y_train, final_model.predict(X_train_selected))\ndev_f1 = f1_score(y_dev, y_dev_pred)\n\nprint(f\"Train F1 Score: {train_f1}\")\nprint(f\"Dev F1 Score: {dev_f1}\")\n",
+        "active": true
+    },
+    {
+        "id": "0-2-1",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Before training, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-2-2",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-2-3",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-2-4",
+        "parent_id": "0-2",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "9.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3",
+        "parent_id": "0",
+        "avg_score": "81.6",
+        "dev_score": "81.0",
+        "visits": 4,
+        "order": 2,
+        "instruction": "Analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model. Use this analysis to engineer new features or transform existing ones that can better capture the underlying patterns in the data.",
+        "code": "from sklearn.preprocessing import PolynomialFeatures\n\ndef engineer_features(df, poly=None):\n    df_copy = df.copy()\n    if 'class' in df_copy.columns:\n        y = df_copy.pop('class')\n    else:\n        y = None\n    \n    # Remove ID columns if any\n    df_copy = df_copy.select_dtypes(include=['number'])\n    \n    # Polynomial features\n    if poly is None:\n        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)\n        poly_features = poly.fit_transform(df_copy)\n    else:\n        poly_features = poly.transform(df_copy)\n    \n    poly_columns = poly.get_feature_names_out(df_copy.columns)\n    df_poly = pd.DataFrame(poly_features, columns=poly_columns)\n    \n    if y is not None:\n        df_poly['class'] = y\n    \n    return df_poly, poly\n\ntrain_df_poly, poly = engineer_features(train_df_processed)\ndev_df_poly, _ = engineer_features(dev_df_processed, poly)\ntest_df_poly, _ = engineer_features(test_df_processed, poly)\n\nprint(\"Processed Train Dataset with Polynomial Features:\")\nprint(train_df_poly.head())\n",
+        "active": false
+    },
+    {
+        "id": "0-3-0",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing a correlation analysis to identify highly correlated features and address potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-1",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set. First, analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques such as oversampling the minority class or undersampling the majority class to balance the dataset before training the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-2",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after visualizing the distribution of numerical features to identify and handle outliers and skewed distributions.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3",
+        "parent_id": "0-3",
+        "avg_score": "81.9",
+        "dev_score": "81.0",
+        "visits": 3,
+        "order": 3,
+        "instruction": "Train a base model to predict the target column 'class' on the train set. Prior to training, analyze the frequency distribution of categorical features to understand their diversity and potential impact on the model.",
+        "code": "from sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.metrics import f1_score\nfrom xgboost import XGBClassifier\n\n# Label encode categorical features if any\nlabel_encoders = {}\nfor col in categorical_columns:\n    le = LabelEncoder()\n    train_df_poly[col] = le.fit_transform(train_df_poly[col])\n    dev_df_poly[col] = le.transform(dev_df_poly[col])\n    test_df_poly[col] = le.transform(test_df_poly[col])\n    label_encoders[col] = le\n\n# Separate features and target\nX_train = train_df_poly.drop(columns=['class'])\ny_train = train_df_poly['class']\nX_dev = dev_df_poly.drop(columns=['class'])\ny_dev = dev_df_poly['class']\nX_test = test_df_poly\n\n# Train a base XGBoost model\nmodel = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=200, max_depth=7, learning_rate=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict on dev set\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Predict on test set\ny_test_pred = model.predict(X_test)\n\n# Save predictions\nimport os\noutput_dir = '../workspace/jasmine'\nos.makedirs(output_dir, exist_ok=True)\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-0",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and perform a correlation analysis to identify highly correlated features and potential multicollinearity issues.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-1",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the distribution of the target variable to check for class imbalance. If class imbalance is detected, apply appropriate sampling techniques before re-evaluating the model.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-2",
+        "parent_id": "0-3-3",
+        "avg_score": "82.3",
+        "dev_score": "82.3",
+        "visits": 2,
+        "order": 4,
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and visualize the distribution of numerical features to identify outliers and skewed distributions.",
+        "code": "# Evaluate the base model on the dev set and report the F1 score\ny_dev_pred = model.predict(X_dev)\ndev_f1 = f1_score(y_dev, y_dev_pred)\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n\n# Save the dev set predictions\npd.DataFrame(y_dev_pred, columns=['target']).to_csv(os.path.join(output_dir, 'dev_predictions.csv'), index=False)\n\n# Save the test set predictions\ny_test_pred = model.predict(X_test)\npd.DataFrame(y_test_pred, columns=['target']).to_csv(os.path.join(output_dir, 'test_predictions.csv'), index=False)\n\n# Print the train and dev set performance\ntrain_f1 = f1_score(y_train, model.predict(X_train))\nprint(f\"Train Set F1 Score: {train_f1}\")\nprint(f\"Dev Set F1 Score: {dev_f1}\")\n",
+        "active": false
+    },
+    {
+        "id": "0-3-3-3",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Evaluate the base model on the dev set, report the F1 score, and analyze the frequency distribution of categorical features to understand their impact on the model performance.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-3-4",
+        "parent_id": "0-3-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "3.5",
+        "instruction": "Conduct a PCA (Principal Component Analysis) on the dev set to reduce dimensionality and understand the variance captured by different components. Then, evaluate the base model on the transformed dev set and report the F1 score.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-3-4",
+        "parent_id": "0-3",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "2.5",
+        "instruction": "Train a base model to predict the target column 'class' on the train set after performing PCA (Principal Component Analysis) to reduce dimensionality and understand the variance captured by different components.",
+        "code": "",
+        "active": false
+    },
+    {
+        "id": "0-4",
+        "parent_id": "0",
+        "avg_score": "0.0",
+        "dev_score": "0.0",
+        "visits": 0,
+        "order": "1.5",
+        "instruction": "Perform Principal Component Analysis (PCA) to reduce dimensionality and understand the variance captured by different components, then engineer features based on the most significant principal components to improve model performance.",
+        "code": "",
+        "active": false
+    }
+]
\ No newline at end of file
diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
new file mode 100644
index 00000000..60d98f73
--- /dev/null
+++ b/src/components/demo2/demo2.vue
@@ -0,0 +1,427 @@
+<script setup>
+import { ref, watch, nextTick, computed } from 'vue';
+import { useResizeObserver } from '@vueuse/core';
+import hljs from 'highlight.js/lib/core';
+import python from 'highlight.js/lib/languages/python';
+import 'highlight.js/styles/github.css';
+// Then register the languages you need
+hljs.registerLanguage('python', python);
+
+import json1 from './datas/credit-g/tree_01.json';
+import json2 from './datas/credit-g/tree_02.json';
+import json3 from './datas/credit-g/tree_03.json';
+import json4 from './datas/credit-g/tree_04.json';
+import json5 from './datas/credit-g/tree_05.json';
+import json6 from './datas/credit-g/tree_06.json';
+import json7 from './datas/credit-g/tree_07.json';
+import json8 from './datas/credit-g/tree_08.json';
+import json9 from './datas/credit-g/tree_09.json';
+import json10 from './datas/credit-g/tree_10.json';
+
+const dealDatatoTree = (data) => {
+  const root = data.find((_) => !_.parent_id);
+  const arr = [[root]];
+  let prevLevel = [root];
+  let count = 1;
+  let nextLevel = [];
+  while (count < data.length) {
+    prevLevel.forEach((_) => {
+      nextLevel = nextLevel.concat(data.filter((__) => __.parent_id === _.id));
+    });
+    arr.push(nextLevel);
+    count += nextLevel.length;
+    prevLevel = nextLevel;
+    nextLevel = [];
+  }
+  return arr;
+};
+
+const list = [
+  json1,
+  json2,
+  json3,
+  json4,
+  json5,
+  json6,
+  json7,
+  json8,
+  json9,
+  json10,
+];
+const index = ref(0);
+const data = ref(dealDatatoTree(json1));
+const dataTree = dealDatatoTree(json10);
+const hasData = (d) => {
+  return list[index.value].find((_) => _.id === d.id);
+};
+
+const calGap = (count) => {
+  const maxw = document.body.clientWidth - 600;
+  return Math.max((maxw - count * 60) / count, 10);
+};
+const prev = () => {
+  if (index.value === 0) return;
+  index.value -= 1;
+};
+const next = () => {
+  if (index.value >= list.length - 1) return;
+  index.value += 1;
+};
+
+const activeNode = computed(() => {
+  return list[index.value].filter((_) => _.active);
+});
+
+const domref = ref();
+const draw = () => {
+  const canvas = document.querySelector('canvas');
+  const wraper = document.querySelector('.ballgraph');
+  canvas.width = wraper.offsetWidth;
+  canvas.height = wraper.offsetHeight;
+  const ctx = canvas.getContext('2d');
+  const d = list[index.value];
+
+  if (ctx) {
+    for (const item of d) {
+      if (item.parent_id) {
+        const parentEl = document.getElementById(item.parent_id);
+        const el = document.getElementById(item.id);
+
+        const {
+          offsetTop: poffsetTop,
+          offsetLeft: poffsetLeft,
+          offsetWidth: poffsetWidth,
+          offsetHeight: poffsetHeight,
+        } = parentEl;
+        const { offsetTop, offsetLeft, offsetWidth, offsetHeight } = el;
+        ctx.beginPath();
+        if (item.active) {
+          ctx.strokeStyle = '#2C3FCB';
+          ctx.lineWidth = 2;
+        } else {
+          ctx.strokeStyle = '#cacdd4';
+          ctx.lineWidth = 1;
+        }
+
+        ctx.moveTo(
+          poffsetLeft + poffsetWidth / 2,
+          poffsetTop + poffsetHeight / 2
+        );
+
+        ctx.lineTo(offsetLeft + offsetWidth / 2, offsetTop + offsetHeight / 2);
+
+        // 执行绘制
+        ctx.stroke();
+        ctx.closePath();
+      }
+    }
+    for (let i = 0; i < activeNode.value.length; i++) {
+      const node = activeNode.value[i];
+      const el = document.getElementById(node.id);
+      const { offsetTop, offsetLeft, offsetWidth, offsetHeight } = el;
+      const target = document.querySelectorAll('.node')[i];
+      const {
+        offsetTop: ot,
+        offsetLeft: ol,
+        offsetWidth: ow,
+        offsetHeight: oh,
+      } = target;
+
+      ctx.beginPath();
+      ctx.strokeStyle = '#cacdd4';
+      ctx.setLineDash([4]);
+      ctx.lineWidth = 1;
+      ctx.moveTo(offsetLeft + offsetWidth, offsetTop + offsetHeight / 2);
+      ctx.lineTo(ol, ot);
+      ctx.lineTo(ol, ot + oh);
+      // ctx.lineTo(offsetLeft + offsetWidth, offsetTop + offsetHeight / 2);
+      ctx.fillStyle = '#0000000b';
+      ctx.fill();
+      // ctx.stroke();
+      ctx.closePath();
+    }
+  }
+};
+
+useResizeObserver(domref, draw);
+nextTick(() => {
+  draw();
+});
+
+watch(index, () => {
+  data.value = dealDatatoTree(list[index.value]);
+  nextTick(() => {
+    draw();
+  });
+});
+window.setInterval(() => {
+  next();
+}, 2000);
+function scoreToColor(score) {
+  if (score >= 97) return '#2C3FCB'; // 100 - 85: 深蓝
+  if (score >= 96) return '#3B5AF7'; // 85 - 70: 亮蓝
+  if (score >= 95) return '#587DF8'; // 70 - 55: 中蓝
+  if (score >= 94.5) return '#7A9FF9'; // 55 - 40: 浅蓝
+  if (score >= 94) return '#9FBDFB'; // 40 - 25: 更浅的蓝
+  if (score >= 10) return '#C5D9FC'; // 25 - 10: 非常浅的蓝
+  return '#EBF3FE'; // 10 - 0: 几乎白色
+}
+function scoreToTextColor(score) {
+  if (score >= 25) return '#fff';
+  return '#1d2129';
+}
+const isLast = computed(() => {
+  return index.value === list.length - 1;
+});
+function getStyle(ite) {
+  return `background-color: ${scoreToColor(
+    ite.dev_score
+  )};color:${scoreToTextColor(ite.dev_score)};visibility: ${
+    hasData(ite) ? 'visible' : 'hidden'
+  }`;
+}
+
+const getCode = (code) => {
+  const arr = code.split('\n');
+  arr.length;
+  return arr.slice(0, 10).join('\n') + '\n......';
+};
+const getRealItem = (ite) => {
+  return list[index.value].find((_) => _.id === ite.id) || {};
+};
+
+const activeTreeNode = ref();
+const setActiveTreeNode = (node) => {
+  if (!isLast.value) {
+    return;
+  }
+
+  if (activeTreeNode.value?.id === node.id) {
+    activeTreeNode.value = undefined;
+  } else {
+    activeTreeNode.value = node;
+  }
+};
+</script>
+
+<template>
+  <div class="wraper" ref="domref">
+    <div class="ballgraph">
+      <div
+        v-for="item of dataTree"
+        class="ballLine"
+        :style="`gap:${calGap(item.length)}px`"
+      >
+        <div
+          v-for="ite of item"
+          class="ball cursor-pointer"
+          :class="{
+            active: getRealItem(ite).active,
+            gray: getRealItem(ite).visits === 0,
+            selected: activeTreeNode?.id === ite.id,
+          }"
+          :id="getRealItem(ite).id"
+          :style="getStyle(getRealItem(ite))"
+          @click="setActiveTreeNode(ite)"
+        >
+          <div>{{ getRealItem(ite).dev_score }}</div>
+        </div>
+      </div>
+      <canvas class="linecanvas"></canvas>
+    </div>
+    <div class="colormap">
+      <section>
+        <div>
+          <span class="text-35px font-500">score</span>
+          <div>max</div>
+        </div>
+      </section>
+      <div class="colorblock"></div>
+      <div class="colorblock"></div>
+      <div class="colorblock"></div>
+      <div class="colorblock"></div>
+      <div class="colorblock"></div>
+      <div class="colorblock"></div>
+      <div>min</div>
+    </div>
+    <div class="rightpannel">
+      <template v-if="activeTreeNode">
+        <div class="node">
+          <div class="instruction bg">
+            Insight: {{ activeTreeNode.instruction }}
+          </div>
+          <div>
+            <pre><code v-html="hljs.highlight(getCode(activeTreeNode.code), { language: 'python' }).value"></code>
+          </pre>
+          </div>
+        </div>
+      </template>
+      <template v-else>
+        <div v-for="node of activeNode" class="node">
+          <div class="instruction bg">Insight: {{ node.instruction }}</div>
+          <div>
+            <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code>
+          </pre>
+          </div>
+        </div>
+        <div class="stacknodes">
+          <div class="node">
+            <div class="instruction">Vannila instruction without insight</div>
+            <div>
+              <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+            </div>
+          </div>
+          <div class="node">
+            <div class="instruction">Vannila instruction without insight</div>
+            <div>
+              <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+            </div>
+          </div>
+          <div class="node">
+            <div class="instruction">Vannila instruction without insight</div>
+            <div>
+              <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+            </div>
+          </div>
+        </div>
+      </template>
+    </div>
+  </div>
+</template>
+
+<style scoped lang="scss">
+.ballgraph {
+  display: flex;
+  flex-direction: column;
+  gap: 300px;
+  position: relative;
+  flex: 1;
+}
+.linecanvas {
+  position: absolute;
+  left: 0;
+  top: 0;
+  width: 100%;
+  height: 100%;
+  z-index: -1;
+}
+.ballLine {
+  display: flex;
+  justify-content: center;
+}
+.ball {
+  height: 60px;
+  width: 60px;
+  border-radius: 20px;
+  background-color: #d4d4d4;
+  font-size: 25px;
+  color: #fff;
+
+  display: flex;
+  justify-content: center;
+  flex-direction: column;
+  align-items: center;
+
+  &.active {
+    outline: 2px solid #2c3fcb;
+  }
+  &.gray {
+    background-color: #cacdd4 !important;
+  }
+  &.selected {
+    border: 2px solid #2c3fcb;
+  }
+}
+
+.wraper {
+  display: flex;
+  padding: 8px;
+}
+.rightpannel {
+  width: 500px;
+}
+.node {
+  margin: 10px;
+  border: 1px solid #e5e6eb;
+  border-radius: 5px;
+  padding: 8px;
+  margin-left: 0;
+  color: #1d2129;
+  box-sizing: border-box;
+  box-shadow: 2px 2px 20px 1px rgba(0, 0, 0, 0.1);
+}
+.instruction {
+  font-size: 20px;
+  line-height: 1.5;
+  color: #1d2129;
+  border-left: 1px solid #e5e6eb;
+  padding: 10px;
+  border-radius: 0 10px 10px 0;
+}
+.bg {
+  background-color: #ebf3fe;
+}
+pre {
+  padding: 8px;
+  border-radius: 5px;
+  background-color: #e5e6eb;
+  overflow: auto;
+  font-size: 10px;
+  line-height: 1.5;
+}
+
+.colormap {
+  position: fixed;
+  left: 10px;
+  bottom: 10px;
+  font-size: 30px;
+  color: #88909b;
+}
+
+.colormap > div {
+  height: 40px;
+  width: 40px;
+}
+.colormap > .colorblock:nth-of-type(1) {
+  background-color: #2c3fcb;
+}
+.colormap > .colorblock:nth-of-type(2) {
+  background-color: #3b5af7;
+}
+.colormap > .colorblock:nth-of-type(3) {
+  background-color: #587df8;
+}
+.colormap > .colorblock:nth-of-type(4) {
+  background-color: #7a9ff9;
+}
+.colormap > .colorblock:nth-of-type(5) {
+  background-color: #9fbdfb;
+}
+.colormap > .colorblock:nth-of-type(6) {
+  background-color: #c5d9fc;
+}
+.stacknodes {
+  position: relative;
+  margin-top: -10px;
+}
+.stacknodes .node {
+  position: absolute;
+  width: 390px;
+  background-color: #fff;
+  z-index: 3;
+}
+.stacknodes .node:nth-child(2) {
+  position: absolute;
+  width: 390px;
+  left: 4px;
+  top: 10px;
+  z-index: 2;
+}
+.stacknodes .node:nth-child(3) {
+  position: absolute;
+  width: 390px;
+  left: 8px;
+  top: 20px;
+  z-index: 1;
+}
+</style>
diff --git a/src/components/demoList.vue b/src/components/demoList.vue
index 7351a46f..d106d5bc 100644
--- a/src/components/demoList.vue
+++ b/src/components/demoList.vue
@@ -39,7 +39,7 @@
         >
           <div class="font-500 h44px">{{ item.project }}</div>
           <Tooltip :content="item.prompt" position="top">
-            <div class="line-clamp-3 font-300 text-12px">{{ item.prompt }}</div>
+            <div class="line-clamp-3 font-400 text-12px">{{ item.prompt }}</div>
           </Tooltip>
         </div>
       </template>

From 5daf20abebb3a48a4082e373fe37455aa81ae02a Mon Sep 17 00:00:00 2001
From: liuminhui <huidbk@163.com>
Date: Wed, 30 Oct 2024 15:02:44 +0800
Subject: [PATCH 02/13] user cases add case file link

---
 src/en/guide/use_cases/agent/interpreter/crawl_webpage.md   | 2 ++
 .../guide/use_cases/agent/interpreter/data_visualization.md | 2 +-
 src/en/guide/use_cases/agent/interpreter/email_summary.md   | 2 ++
 .../guide/use_cases/agent/interpreter/human_interaction.md  | 2 ++
 src/en/guide/use_cases/agent/interpreter/image_removebg.md  | 2 ++
 src/en/guide/use_cases/agent/interpreter/imitate_webpage.md | 2 ++
 .../guide/use_cases/agent/interpreter/machine_learning.md   | 2 ++
 .../agent/interpreter/machine_learning_with_tools.md        | 2 ++
 src/en/guide/use_cases/agent/interpreter/ocr_receipt.md     | 2 ++
 .../agent/interpreter/solve_mathematical_problems.md        | 6 ++++++
 src/en/guide/use_cases/agent/interpreter/text2image.md      | 2 ++
 src/en/guide/use_cases/agent/receipt_assistant.md           | 4 ++--
 src/en/guide/use_cases/agent/tutorial_assistant.md          | 6 ++++--
 src/zh/guide/use_cases/agent/interpreter/crawl_webpage.md   | 2 ++
 .../guide/use_cases/agent/interpreter/data_visualization.md | 2 +-
 src/zh/guide/use_cases/agent/interpreter/email_summary.md   | 2 ++
 .../guide/use_cases/agent/interpreter/human_interaction.md  | 2 ++
 src/zh/guide/use_cases/agent/interpreter/image_removebg.md  | 2 ++
 src/zh/guide/use_cases/agent/interpreter/imitate_webpage.md | 2 ++
 .../guide/use_cases/agent/interpreter/machine_learning.md   | 2 ++
 .../agent/interpreter/machine_learning_with_tools.md        | 2 ++
 src/zh/guide/use_cases/agent/interpreter/ocr_receipt.md     | 2 ++
 .../agent/interpreter/solve_mathematical_problems.md        | 2 ++
 src/zh/guide/use_cases/agent/interpreter/text2image.md      | 2 ++
 src/zh/guide/use_cases/agent/receipt_assistant.md           | 4 ++--
 src/zh/guide/use_cases/agent/tutorial_assistant.md          | 6 ++++--
 26 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/src/en/guide/use_cases/agent/interpreter/crawl_webpage.md b/src/en/guide/use_cases/agent/interpreter/crawl_webpage.md
index 8e3fce78..e8375422 100644
--- a/src/en/guide/use_cases/agent/interpreter/crawl_webpage.md
+++ b/src/en/guide/use_cases/agent/interpreter/crawl_webpage.md
@@ -12,6 +12,8 @@ Retrieve paper information containing the keywords: `multiagent` and `large lang
 
 ### Code
 
+[examples/di/crawl_webpage.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/crawl_webpage.py)
+
 ```bash
 python examples/di/crawl_webpage.py
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/data_visualization.md b/src/en/guide/use_cases/agent/interpreter/data_visualization.md
index baeec5d7..308ecbab 100644
--- a/src/en/guide/use_cases/agent/interpreter/data_visualization.md
+++ b/src/en/guide/use_cases/agent/interpreter/data_visualization.md
@@ -16,7 +16,7 @@ Use `DataInterpreter` to perform a simple data analysis and visualize the sklear
 python examples/di/data_visualization.py
 ```
 
-The code in `examples/di/data_visualization.py` is as follows:
+The code in [examples/di/data_visualization.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/data_visualization.py) is as follows:
 
 ```python
 import asyncio
diff --git a/src/en/guide/use_cases/agent/interpreter/email_summary.md b/src/en/guide/use_cases/agent/interpreter/email_summary.md
index 60fe2bff..04673071 100644
--- a/src/en/guide/use_cases/agent/interpreter/email_summary.md
+++ b/src/en/guide/use_cases/agent/interpreter/email_summary.md
@@ -12,6 +12,8 @@ After logging into the email, display the sender and the body of the latest 5 em
 
 ### Code
 
+[examples/di/email_summary.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/email_summary.py)
+
 ```bash
 python examples/di/email_summary.py
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/human_interaction.md b/src/en/guide/use_cases/agent/interpreter/human_interaction.md
index 8794bbef..f1c289d0 100644
--- a/src/en/guide/use_cases/agent/interpreter/human_interaction.md
+++ b/src/en/guide/use_cases/agent/interpreter/human_interaction.md
@@ -12,6 +12,8 @@ We use the same [machine learning scenario](./machine_learning.md) as an example
 
 ### Code
 
+[examples/di/machine_learning.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/machine_learning.py)
+
 ```
 python examples/di/machine_learning.py --auto_run False
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/image_removebg.md b/src/en/guide/use_cases/agent/interpreter/image_removebg.md
index cb5cb1c0..e391dd92 100644
--- a/src/en/guide/use_cases/agent/interpreter/image_removebg.md
+++ b/src/en/guide/use_cases/agent/interpreter/image_removebg.md
@@ -12,6 +12,8 @@ Use `DataInterpreter` to remove background from a picture of a dog.
 
 ### Code
 
+[examples/di/rm_image_background.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/rm_image_background.py)
+
 ```bash
 python examples/di/rm_image_background.py
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/imitate_webpage.md b/src/en/guide/use_cases/agent/interpreter/imitate_webpage.md
index d1258666..88aa7a7c 100644
--- a/src/en/guide/use_cases/agent/interpreter/imitate_webpage.md
+++ b/src/en/guide/use_cases/agent/interpreter/imitate_webpage.md
@@ -12,6 +12,8 @@ Given a URL or an image of a webpage, use the MetaGPT tool integrated with GPT-4
 
 ### Code
 
+[examples/di/imitate_webpage.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/imitate_webpage.py)
+
 ```bash
 python examples/di/imitate_webpage.py
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/machine_learning.md b/src/en/guide/use_cases/agent/interpreter/machine_learning.md
index f6265985..8ebdd906 100644
--- a/src/en/guide/use_cases/agent/interpreter/machine_learning.md
+++ b/src/en/guide/use_cases/agent/interpreter/machine_learning.md
@@ -12,6 +12,8 @@ We use the [sklearn wine recognition dataset](https://scikit-learn.org/stable/da
 
 ### Code
 
+[examples/di/machine_learning.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/machine_learning.py)
+
 ```bash
 python examples/di/machine_learning.py
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/machine_learning_with_tools.md b/src/en/guide/use_cases/agent/interpreter/machine_learning_with_tools.md
index b47bcf91..46df8f26 100644
--- a/src/en/guide/use_cases/agent/interpreter/machine_learning_with_tools.md
+++ b/src/en/guide/use_cases/agent/interpreter/machine_learning_with_tools.md
@@ -8,6 +8,8 @@ Use `DataInterpreter` to model and predict the [titanic](https://www.kaggle.com/
 
 ### Code
 
+[examples/di/machine_learning_with_tools.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/machine_learning_with_tools.py)
+
 ```bash
 python examples/di/machine_learning_with_tools.py
 ```
diff --git a/src/en/guide/use_cases/agent/interpreter/ocr_receipt.md b/src/en/guide/use_cases/agent/interpreter/ocr_receipt.md
index 33b3fecb..8cb7184d 100644
--- a/src/en/guide/use_cases/agent/interpreter/ocr_receipt.md
+++ b/src/en/guide/use_cases/agent/interpreter/ocr_receipt.md
@@ -14,6 +14,8 @@ Use `DataInterpreter` to perform OCR recognition on the following receipt, extra
 
 ### Code
 
+[examples/di/ocr_receipt.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/ocr_receipt.py)
+
 > Note: You need to install Paddle-related dependencies to run this example, execute
 > `pip install metagpt[ocr]`
 
diff --git a/src/en/guide/use_cases/agent/interpreter/solve_mathematical_problems.md b/src/en/guide/use_cases/agent/interpreter/solve_mathematical_problems.md
index 305dc54c..ea40e0e7 100644
--- a/src/en/guide/use_cases/agent/interpreter/solve_mathematical_problems.md
+++ b/src/en/guide/use_cases/agent/interpreter/solve_mathematical_problems.md
@@ -12,6 +12,12 @@ At a school, all 60 students play on at least one of three teams: Basketball, So
 
 ### Code
 
+[examples/di/solve_math_problems.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/solve_math_problems.py)
+
+```bash
+python examples/di/solve_math_problems.py
+```
+
 ```python
 import asyncio
 
diff --git a/src/en/guide/use_cases/agent/interpreter/text2image.md b/src/en/guide/use_cases/agent/interpreter/text2image.md
index 58b5032b..fc25cd38 100644
--- a/src/en/guide/use_cases/agent/interpreter/text2image.md
+++ b/src/en/guide/use_cases/agent/interpreter/text2image.md
@@ -12,6 +12,8 @@ Use the text-to-image tool of stable diffusion to generate an image from a given
 
 ### Code
 
+[examples/di/sd_tool_usage.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/sd_tool_usage.py)
+
 ```bash
 python examples/di/sd_tool_usage.py
 ```
diff --git a/src/en/guide/use_cases/agent/receipt_assistant.md b/src/en/guide/use_cases/agent/receipt_assistant.md
index b2f2b9bc..9497728c 100644
--- a/src/en/guide/use_cases/agent/receipt_assistant.md
+++ b/src/en/guide/use_cases/agent/receipt_assistant.md
@@ -13,7 +13,7 @@ Supports OCR recognition of invoice files in `pdf`, `png`, `jpg`, and `zip` form
 
 ### Source Code
 
-[GitHub Source Code](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/invoice_ocr_assistant.py)
+[metagpt/roles/invoice_ocr_assistant.py](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/invoice_ocr_assistant.py)
 
 ## Role Definition
 
@@ -292,7 +292,7 @@ Supports OCR recognition of invoice files in `pdf`, `png`, `jpg`, and `zip` form
 
 ### Execution Command Example
 
-In the project's root directory, execute the command `python3 /examples/invoice_ocr.py`.
+In the project's root directory, execute the command `python3 examples/invoice_ocr.py`.
 
 ### Execution Results
 
diff --git a/src/en/guide/use_cases/agent/tutorial_assistant.md b/src/en/guide/use_cases/agent/tutorial_assistant.md
index f205fd1e..bc769e8e 100644
--- a/src/en/guide/use_cases/agent/tutorial_assistant.md
+++ b/src/en/guide/use_cases/agent/tutorial_assistant.md
@@ -12,7 +12,7 @@ The design approach involves using the `LLM` (Large Language Model) to initially
 
 ### Source Code
 
-[GitHub Source Code](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/tutorial_assistant.py)
+[metagpt/roles/tutorial_assistant.py](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/tutorial_assistant.py)
 
 ## Role Definition
 
@@ -190,7 +190,9 @@ The design approach involves using the `LLM` (Large Language Model) to initially
 
 ### Execution Command Examples
 
-Provide corresponding execution command examples.
+```bash
+python examples/write_tutorial.py
+```
 
 ### Execution Results
 
diff --git a/src/zh/guide/use_cases/agent/interpreter/crawl_webpage.md b/src/zh/guide/use_cases/agent/interpreter/crawl_webpage.md
index 20235962..701d9e11 100644
--- a/src/zh/guide/use_cases/agent/interpreter/crawl_webpage.md
+++ b/src/zh/guide/use_cases/agent/interpreter/crawl_webpage.md
@@ -12,6 +12,8 @@
 
 ### 代码
 
+[examples/di/crawl_webpage.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/crawl_webpage.py)
+
 ```bash
 python examples/di/crawl_webpage.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/data_visualization.md b/src/zh/guide/use_cases/agent/interpreter/data_visualization.md
index eda85f14..7adeab88 100644
--- a/src/zh/guide/use_cases/agent/interpreter/data_visualization.md
+++ b/src/zh/guide/use_cases/agent/interpreter/data_visualization.md
@@ -16,7 +16,7 @@
 python examples/di/data_visualization.py
 ```
 
-`examples/di/data_visualization.py`文件中的代码具体为：
+[examples/di/data_visualization.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/data_visualization.py) 文件中的代码具体为：
 
 ```python
 import asyncio
diff --git a/src/zh/guide/use_cases/agent/interpreter/email_summary.md b/src/zh/guide/use_cases/agent/interpreter/email_summary.md
index 396e441f..14719c14 100644
--- a/src/zh/guide/use_cases/agent/interpreter/email_summary.md
+++ b/src/zh/guide/use_cases/agent/interpreter/email_summary.md
@@ -12,6 +12,8 @@
 
 ### 代码
 
+[examples/di/email_summary.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/email_summary.py)
+
 ```bash
 python examples/di/email_summary.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/human_interaction.md b/src/zh/guide/use_cases/agent/interpreter/human_interaction.md
index cdb22909..2fa38c38 100644
--- a/src/zh/guide/use_cases/agent/interpreter/human_interaction.md
+++ b/src/zh/guide/use_cases/agent/interpreter/human_interaction.md
@@ -12,6 +12,8 @@
 
 ### 代码
 
+[examples/di/machine_learning.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/machine_learning.py)
+
 ```
 python examples/di/machine_learning.py --auto_run False
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/image_removebg.md b/src/zh/guide/use_cases/agent/interpreter/image_removebg.md
index 7f252a5a..908d660d 100644
--- a/src/zh/guide/use_cases/agent/interpreter/image_removebg.md
+++ b/src/zh/guide/use_cases/agent/interpreter/image_removebg.md
@@ -12,6 +12,8 @@
 
 ### 代码
 
+[examples/di/rm_image_background.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/rm_image_background.py)
+
 ```bash
 python examples/di/rm_image_background.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/imitate_webpage.md b/src/zh/guide/use_cases/agent/interpreter/imitate_webpage.md
index 7ef1e2c9..f5d60c79 100644
--- a/src/zh/guide/use_cases/agent/interpreter/imitate_webpage.md
+++ b/src/zh/guide/use_cases/agent/interpreter/imitate_webpage.md
@@ -12,6 +12,8 @@
 
 ### 代码
 
+[examples/di/imitate_webpage.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/imitate_webpage.py)
+
 ```bash
 python examples/di/imitate_webpage.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/machine_learning.md b/src/zh/guide/use_cases/agent/interpreter/machine_learning.md
index 7a68ff2e..90b88c90 100644
--- a/src/zh/guide/use_cases/agent/interpreter/machine_learning.md
+++ b/src/zh/guide/use_cases/agent/interpreter/machine_learning.md
@@ -12,6 +12,8 @@
 
 ### 代码
 
+[examples/di/machine_learning.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/machine_learning.py)
+
 ```bash
 python examples/di/machine_learning.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/machine_learning_with_tools.md b/src/zh/guide/use_cases/agent/interpreter/machine_learning_with_tools.md
index a74fd1d5..97b10850 100644
--- a/src/zh/guide/use_cases/agent/interpreter/machine_learning_with_tools.md
+++ b/src/zh/guide/use_cases/agent/interpreter/machine_learning_with_tools.md
@@ -8,6 +8,8 @@
 
 ### 代码
 
+[examples/di/machine_learning_with_tools.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/machine_learning_with_tools.py)
+
 ```bash
 python examples/di/machine_learning_with_tools.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/ocr_receipt.md b/src/zh/guide/use_cases/agent/interpreter/ocr_receipt.md
index 1082d77f..7a01aa9d 100644
--- a/src/zh/guide/use_cases/agent/interpreter/ocr_receipt.md
+++ b/src/zh/guide/use_cases/agent/interpreter/ocr_receipt.md
@@ -14,6 +14,8 @@ OCR，是一种识别图片中文字，得到结构化文本信息的技术手
 
 ### 代码
 
+[examples/di/ocr_receipt.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/ocr_receipt.py)
+
 > 注意：你需要事先安装Paddle相关依赖以运行此示例，可执行
 > `pip install metagpt[ocr]`
 
diff --git a/src/zh/guide/use_cases/agent/interpreter/solve_mathematical_problems.md b/src/zh/guide/use_cases/agent/interpreter/solve_mathematical_problems.md
index 5d0fc69e..b62e71ce 100644
--- a/src/zh/guide/use_cases/agent/interpreter/solve_mathematical_problems.md
+++ b/src/zh/guide/use_cases/agent/interpreter/solve_mathematical_problems.md
@@ -12,6 +12,8 @@ At a school, all 60 students play on at least one of three teams: Basketball, So
 
 ### 代码
 
+[examples/di/solve_math_problems.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/solve_math_problems.py)
+
 ```bash
 python examples/di/solve_math_problems.py
 ```
diff --git a/src/zh/guide/use_cases/agent/interpreter/text2image.md b/src/zh/guide/use_cases/agent/interpreter/text2image.md
index d86dc1e5..64321ebe 100644
--- a/src/zh/guide/use_cases/agent/interpreter/text2image.md
+++ b/src/zh/guide/use_cases/agent/interpreter/text2image.md
@@ -12,6 +12,8 @@ Text2Image，指通过文字描述获取图片
 
 ### 代码
 
+[examples/di/sd_tool_usage.py](https://github.com/geekan/MetaGPT/blob/main/examples/di/sd_tool_usage.py)
+
 ```bash
 python examples/di/sd_tool_usage.py
 ```
diff --git a/src/zh/guide/use_cases/agent/receipt_assistant.md b/src/zh/guide/use_cases/agent/receipt_assistant.md
index 7c589e30..ea1b54d8 100644
--- a/src/zh/guide/use_cases/agent/receipt_assistant.md
+++ b/src/zh/guide/use_cases/agent/receipt_assistant.md
@@ -13,7 +13,7 @@
 
 ### 源码
 
-[GitHub Source Code](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/invoice_ocr_assistant.py)
+[metagpt/roles/invoice_ocr_assistant.py](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/invoice_ocr_assistant.py)
 
 ## 角色定义
 
@@ -301,7 +301,7 @@
 
 ### 执行命令样例
 
-在项目根目录下，执行命令行 `python3 /examples/invoice_ocr.py`。
+在项目根目录下，执行命令行 `python3 examples/invoice_ocr.py`。
 
 ### 执行结果
 
diff --git a/src/zh/guide/use_cases/agent/tutorial_assistant.md b/src/zh/guide/use_cases/agent/tutorial_assistant.md
index edfc44aa..58a9331c 100644
--- a/src/zh/guide/use_cases/agent/tutorial_assistant.md
+++ b/src/zh/guide/use_cases/agent/tutorial_assistant.md
@@ -12,7 +12,7 @@
 
 ### 源码
 
-[GitHub Source Code](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/tutorial_assistant.py)
+[metagpt/roles/tutorial_assistant.py](https://github.com/geekan/MetaGPT/blob/main/metagpt/roles/tutorial_assistant.py)
 
 ## 角色定义
 
@@ -195,7 +195,9 @@
 
 ### 执行命令样例
 
-贴对应的执行命令样例
+```bash
+python examples/write_tutorial.py
+```
 
 ### 执行结果
 

From 900efa4a660d1e7484f239447c32405d2914bbd1 Mon Sep 17 00:00:00 2001
From: liuminhui <huidbk@163.com>
Date: Wed, 30 Oct 2024 15:52:19 +0800
Subject: [PATCH 03/13] In-Depth Guides Optimize

---
 src/en/guide/in_depth_guides/breakpoint_recovery.md | 6 +++++-
 src/en/guide/in_depth_guides/rag_module.md          | 2 +-
 src/zh/guide/in_depth_guides/breakpoint_recovery.md | 6 +++++-
 src/zh/guide/in_depth_guides/rag_module.md          | 2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/en/guide/in_depth_guides/breakpoint_recovery.md b/src/en/guide/in_depth_guides/breakpoint_recovery.md
index f64b6fcf..b94cc7f4 100644
--- a/src/en/guide/in_depth_guides/breakpoint_recovery.md
+++ b/src/en/guide/in_depth_guides/breakpoint_recovery.md
@@ -29,7 +29,9 @@ When the program is interrupted or terminated, the file structure in the storage
       team.json          # Contains information such as team, environment, roles, actions, etc.
 ```
 
-Example of data summary corresponding to `team.json`.
+<details>
+
+<summary>Example of data summary corresponding to team.json </summary>
 
 ```json
 {
@@ -140,6 +142,8 @@ Example of data summary corresponding to `team.json`.
 }
 ```
 
+</details>
+
 ### Execution order during recovery
 
 Since MetaGPT is an asynchronous execution framework, there are several typical interception points and recovery sequences as follows.
diff --git a/src/en/guide/in_depth_guides/rag_module.md b/src/en/guide/in_depth_guides/rag_module.md
index aeddca0a..612f3ef8 100644
--- a/src/en/guide/in_depth_guides/rag_module.md
+++ b/src/en/guide/in_depth_guides/rag_module.md
@@ -10,7 +10,7 @@ This article focuses on the RAG functions provided by the current MetaGPT:
 4. Data update, addition of text and Python objects.
 5. Data storage and recovery, vectorization is not required each time.
 
-For more examples, please see [rag_pipeline](https://github.com/geekan/MetaGPT/blob/main/examples/rag_pipeline.py) and [rag_search](https://github.com/geekan/MetaGPT/blob/main/examples/rag_search.py)
+For more examples, please see [rag_pipeline](https://github.com/geekan/MetaGPT/blob/main/examples/rag/rag_pipeline.py) and [rag_search](https://github.com/geekan/MetaGPT/blob/main/examples/rag/rag_search.py)
 
 ## Prepare
 
diff --git a/src/zh/guide/in_depth_guides/breakpoint_recovery.md b/src/zh/guide/in_depth_guides/breakpoint_recovery.md
index 7a6245d6..7cd33e5c 100644
--- a/src/zh/guide/in_depth_guides/breakpoint_recovery.md
+++ b/src/zh/guide/in_depth_guides/breakpoint_recovery.md
@@ -29,7 +29,9 @@
       team.json          # 包含团队、环境、角色、动作等信息
 ```
 
-`team.json`对应内容的数据概要示例。
+<details>
+
+<summary>team.json 对应内容的数据概要示例。</summary>
 
 ```json
 {
@@ -140,6 +142,8 @@
 }
 ```
 
+</details>
+
 ### 恢复时的执行顺序
 
 由于MetaGPT是异步执行框架，对于下述几种典型的中断截点和恢复顺序。
diff --git a/src/zh/guide/in_depth_guides/rag_module.md b/src/zh/guide/in_depth_guides/rag_module.md
index 4fb476b7..1f21b253 100644
--- a/src/zh/guide/in_depth_guides/rag_module.md
+++ b/src/zh/guide/in_depth_guides/rag_module.md
@@ -10,7 +10,7 @@ RAG（Retrieval-Augmented Generation）通过引用外部权威知识库来优
 4. 数据更新，增加文本与python对象
 5. 数据保存及恢复，不用每次都进行向量化
 
-更多的例子请查看 [rag_pipeline](https://github.com/geekan/MetaGPT/blob/main/examples/rag_pipeline.py) 和 [rag_search](https://github.com/geekan/MetaGPT/blob/main/examples/rag_search.py)
+更多的例子请查看 [rag_pipeline](https://github.com/geekan/MetaGPT/blob/main/examples/rag/rag_pipeline.py) 和 [rag_search](https://github.com/geekan/MetaGPT/blob/main/examples/rag/rag_search.py)
 
 ## 前置准备
 

From 7624b74a7ba202afa11edb14a1329f1d2b8c4779 Mon Sep 17 00:00:00 2001
From: chenshuanglong <chenshuanglong@fuzhi.ai>
Date: Wed, 30 Oct 2024 22:29:36 +0800
Subject: [PATCH 04/13] feat: demo2

---
 src/components/demo2/demo2.vue | 667 +++++++++++++++++++++++++--------
 1 file changed, 521 insertions(+), 146 deletions(-)

diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index 60d98f73..e22329e6 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -1,23 +1,89 @@
 <script setup>
 import { ref, watch, nextTick, computed } from 'vue';
 import { useResizeObserver } from '@vueuse/core';
+import IpynbViewer from './ipynbViewer.vue';
 import hljs from 'highlight.js/lib/core';
 import python from 'highlight.js/lib/languages/python';
 import 'highlight.js/styles/github.css';
-// Then register the languages you need
+// import { downloadFile } from '@/utils/download';
+
+// 注册 Python 语言高亮
 hljs.registerLanguage('python', python);
+const trees = ref([]);
+
+// export const downloadFile = (filename, blob) => {
+//   const url = window.URL.createObjectURL(blob);
+
+//   const a = document.createElement('a');
+//   a.href = url;
+//   a.download = filename;
+//   a.click();
+//   // 5.释放这个临时的对象url
+//   window.URL.revokeObjectURL(url);
+// };
 
-import json1 from './datas/credit-g/tree_01.json';
-import json2 from './datas/credit-g/tree_02.json';
-import json3 from './datas/credit-g/tree_03.json';
-import json4 from './datas/credit-g/tree_04.json';
-import json5 from './datas/credit-g/tree_05.json';
-import json6 from './datas/credit-g/tree_06.json';
-import json7 from './datas/credit-g/tree_07.json';
-import json8 from './datas/credit-g/tree_08.json';
-import json9 from './datas/credit-g/tree_09.json';
-import json10 from './datas/credit-g/tree_10.json';
+// 动态导入所有 JSON 文件
+// const trees = import.meta.glob('./trees/*/*.json', { eager: true });
+// 动态导入所有 ipynb 文件
+// const notebooks = import.meta.glob('./trees/*/*.ipynb');
 
+// 获取文件夹名称
+function getMiddlePath(path) {
+  const match = path.match(/\/trees\/([^/]+)\/[^/]+\.json$/);
+  return match ? match[1] : null;
+}
+
+// 获取所有文件夹名称
+const folders = [
+  'Click_prediction_small',
+  'GesturePhaseSegmentationProcessed',
+  'credit-g',
+  'jasmine',
+  'kc1',
+  'kick',
+  'mfeat-factors',
+  'segment',
+  'smoker-status',
+  'software-defects',
+  'wine-quality-white',
+];
+
+// 当前选中的文件夹
+const currentFolder = ref(folders[0]);
+
+let maxScore = 100;
+let minScore = 0;
+// 根据当前文件夹获取对应的 JSON 文件列表
+const list = computed(() => {
+  if (!currentFolder.value) return [];
+
+  const newList = trees.value;
+
+  const scores = newList[newList.length - 1]?.map((_) =>
+    Number(_.dev_score)
+  ) || [0, 100];
+  maxScore = Math.max(...scores);
+  minScore = Math.min(...scores);
+  return newList;
+});
+
+const fetchTreeData = async (folder) => {
+  const promises = [];
+  promises.push(
+    fetch(
+      `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${folder}/tree.json`
+    ).then((res) => res.json())
+  );
+  for (let i = 1; i < 11; i++) {
+    const url = `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${folder}/tree_${i
+      .toString()
+      .padStart(2, '0')}.json`;
+    promises.push(fetch(url).then((res) => res.json()));
+  }
+  return await Promise.all(promises);
+};
+
+// 处理树形数据
 const dealDatatoTree = (data) => {
   const root = data.find((_) => !_.parent_id);
   const arr = [[root]];
@@ -36,56 +102,57 @@ const dealDatatoTree = (data) => {
   return arr;
 };
 
-const list = [
-  json1,
-  json2,
-  json3,
-  json4,
-  json5,
-  json6,
-  json7,
-  json8,
-  json9,
-  json10,
-];
 const index = ref(0);
-const data = ref(dealDatatoTree(json1));
-const dataTree = dealDatatoTree(json10);
+// const data = computed(() =>
+//   list.value.length ? dealDatatoTree(list.value[index.value]) : []
+// );
+const dataTree = computed(() =>
+  list.value.length ? dealDatatoTree(list.value[list.value.length - 1]) : []
+);
+
 const hasData = (d) => {
-  return list[index.value].find((_) => _.id === d.id);
+  return list.value[index.value]?.find((_) => _.id === d.id);
 };
 
 const calGap = (count) => {
   const maxw = document.body.clientWidth - 600;
   return Math.max((maxw - count * 60) / count, 10);
 };
+
 const prev = () => {
   if (index.value === 0) return;
   index.value -= 1;
 };
+
 const next = () => {
-  if (index.value >= list.length - 1) return;
+  if (index.value >= list.value.length - 1) {
+    return;
+  }
   index.value += 1;
 };
 
 const activeNode = computed(() => {
-  return list[index.value].filter((_) => _.active);
+  return list.value[index.value]?.filter((_) => _.active) || [];
 });
 
 const domref = ref();
 const draw = () => {
   const canvas = document.querySelector('canvas');
   const wraper = document.querySelector('.ballgraph');
+  if (!canvas || !wraper) return;
+
   canvas.width = wraper.offsetWidth;
   canvas.height = wraper.offsetHeight;
   const ctx = canvas.getContext('2d');
-  const d = list[index.value];
+  const d = dataWithActive.value;
 
-  if (ctx) {
+  if (ctx && d) {
+    // 绘制节点之间的连线
     for (const item of d) {
       if (item.parent_id) {
         const parentEl = document.getElementById(item.parent_id);
         const el = document.getElementById(item.id);
+        if (!parentEl || !el) continue;
 
         const {
           offsetTop: poffsetTop,
@@ -94,6 +161,7 @@ const draw = () => {
           offsetHeight: poffsetHeight,
         } = parentEl;
         const { offsetTop, offsetLeft, offsetWidth, offsetHeight } = el;
+
         ctx.beginPath();
         if (item.active) {
           ctx.strokeStyle = '#2C3FCB';
@@ -107,72 +175,159 @@ const draw = () => {
           poffsetLeft + poffsetWidth / 2,
           poffsetTop + poffsetHeight / 2
         );
-
         ctx.lineTo(offsetLeft + offsetWidth / 2, offsetTop + offsetHeight / 2);
-
-        // 执行绘制
         ctx.stroke();
         ctx.closePath();
       }
     }
-    for (let i = 0; i < activeNode.value.length; i++) {
-      const node = activeNode.value[i];
-      const el = document.getElementById(node.id);
-      const { offsetTop, offsetLeft, offsetWidth, offsetHeight } = el;
-      const target = document.querySelectorAll('.node')[i];
-      const {
-        offsetTop: ot,
-        offsetLeft: ol,
-        offsetWidth: ow,
-        offsetHeight: oh,
-      } = target;
-
-      ctx.beginPath();
-      ctx.strokeStyle = '#cacdd4';
-      ctx.setLineDash([4]);
-      ctx.lineWidth = 1;
-      ctx.moveTo(offsetLeft + offsetWidth, offsetTop + offsetHeight / 2);
-      ctx.lineTo(ol, ot);
-      ctx.lineTo(ol, ot + oh);
-      // ctx.lineTo(offsetLeft + offsetWidth, offsetTop + offsetHeight / 2);
-      ctx.fillStyle = '#0000000b';
-      ctx.fill();
-      // ctx.stroke();
-      ctx.closePath();
-    }
   }
 };
 
+const activeTreeNodeId = ref(null);
+// 获取从根节点到目标节点的路径ID数组
+const getNodePathIds = (nodeId, allNodes) => {
+  const pathIds = [];
+  let currentId = nodeId;
+
+  while (currentId) {
+    pathIds.unshift(currentId);
+    const parentNode = allNodes.find((n) => n.id === currentId);
+    currentId = parentNode?.parent_id;
+  }
+
+  return pathIds;
+};
+
+// 计算当前激活路径上的所有节点ID
+const activePathIds = computed(() => {
+  if (!activeTreeNodeId.value || !list.value[index.value]) {
+    return new Set();
+  }
+  return new Set(
+    getNodePathIds(activeTreeNodeId.value, list.value[index.value])
+  );
+});
+
+const downloadNotebook = async (nodeId) => {
+  const filePath = `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${currentFolder.value}/Node-${nodeId}.ipynb`;
+  const fileContent = await fetch(filePath).then((res) => res.blob());
+  downloadFile(`Node-${nodeId}.ipynb`, fileContent);
+};
+
+// 计算带有激活状态的节点数据，不修改原数据
+const dataWithActive = computed(() => {
+  if (!list.value[index.value]) {
+    return [];
+  }
+  return list.value[index.value].map((node) => ({
+    ...node,
+    active: activeTreeNodeId.value
+      ? activePathIds.value.has(node.id)
+      : node.active,
+  }));
+});
+
+// 获取当前选中节点的完整路径上的所有节点
+const activeNodePath = computed(() => {
+  if (!activeTreeNodeId.value || !list.value[index.value]) {
+    return [];
+  }
+
+  const pathIds = getNodePathIds(
+    activeTreeNodeId.value,
+    list.value[index.value]
+  );
+  return pathIds
+    .map((id) => list.value[index.value].find((node) => node.id === id))
+    .filter(Boolean);
+});
+
 useResizeObserver(domref, draw);
+
 nextTick(() => {
+  if (folders.length > 0) {
+    currentFolder.value = folders[0];
+  }
   draw();
 });
 
-watch(index, () => {
-  data.value = dealDatatoTree(list[index.value]);
+watch([index], () => {
   nextTick(() => {
     draw();
   });
 });
-window.setInterval(() => {
-  next();
-}, 2000);
+// onMounted(async () => {
+//   trees.value = await fetchTreeData(currentFolder.value);
+
+// })
+
+// 自动播放
+let interval;
+const startAutoPlay = () => {
+  interval = window.setInterval(() => {
+    if (index.value >= list.value.length - 1) {
+      // index.value >= list.value.length - 1
+      // dataTree 中所有层级中找到node 分数最大的那个
+      const maxNode = dataTree.value.reduce(
+        (max, item) => {
+          console.log('item', item);
+          const maxNode = item.reduce((max, node) =>
+            Number(node.dev_score) > Number(max.dev_score) ? node : max
+          );
+          return Number(maxNode.dev_score) > Number(max.dev_score)
+            ? maxNode
+            : max;
+        },
+        { dev_score: -999 }
+      );
+      // console.log('maxNode', maxNode);
+      setActiveTreeNode(maxNode);
+
+      if (interval) {
+        clearInterval(interval);
+      }
+      return;
+    }
+    next();
+  }, 2000);
+};
+
+const stopAutoPlay = () => {
+  if (interval) {
+    clearInterval(interval);
+  }
+};
+
+// startAutoPlay();
+
+onUnmounted(() => {
+  stopAutoPlay();
+});
+
 function scoreToColor(score) {
-  if (score >= 97) return '#2C3FCB'; // 100 - 85: 深蓝
-  if (score >= 96) return '#3B5AF7'; // 85 - 70: 亮蓝
-  if (score >= 95) return '#587DF8'; // 70 - 55: 中蓝
-  if (score >= 94.5) return '#7A9FF9'; // 55 - 40: 浅蓝
-  if (score >= 94) return '#9FBDFB'; // 40 - 25: 更浅的蓝
-  if (score >= 10) return '#C5D9FC'; // 25 - 10: 非常浅的蓝
-  return '#EBF3FE'; // 10 - 0: 几乎白色
+  // 根据minScore - maxScore 平均划分为6个区间
+  const percent = (score - minScore) / (maxScore - minScore);
+
+  if (percent >= 0.833) return '#2C3FCB'; // 最高区间
+  if (percent >= 0.666) return '#3B5AF7'; // 第二区间
+  if (percent >= 0.5) return '#587DF8'; // 第三区间
+  if (percent >= 0.333) return '#7A9FF9'; // 第四区间
+  if (percent >= 0.166) return '#9FBDFB'; // 第五区间
+  if (percent >= 0) return '#C5D9FC'; // 第六区间
+  return '#EBF3FE'; // 兜底颜色
 }
+
 function scoreToTextColor(score) {
-  if (score >= 25) return '#fff';
-  return '#1d2129';
+  //   if (score >= 25) return '#fff';
+  // return '#1d2129';
+  const percent = (score - minScore) / (maxScore - minScore);
+  return percent >= 0.5 ? '#fff' : '#1d2129';
 }
+
 const isLast = computed(() => {
-  return index.value === list.length - 1;
+  return index.value === list.value.length - 1;
 });
+
 function getStyle(ite) {
   return `background-color: ${scoreToColor(
     ite.dev_score
@@ -183,113 +338,227 @@ function getStyle(ite) {
 
 const getCode = (code) => {
   const arr = code.split('\n');
-  arr.length;
   return arr.slice(0, 10).join('\n') + '\n......';
 };
+
 const getRealItem = (ite) => {
-  return list[index.value].find((_) => _.id === ite.id) || {};
+  return list.value[index.value]?.find((_) => _.id === ite.id) || {};
 };
 
-const activeTreeNode = ref();
+// 修改后的 setActiveTreeNode 函数
 const setActiveTreeNode = (node) => {
   if (!isLast.value) {
     return;
   }
 
-  if (activeTreeNode.value?.id === node.id) {
-    activeTreeNode.value = undefined;
+  if (activeTreeNodeId.value === node.id) {
+    activeTreeNodeId.value = null;
   } else {
-    activeTreeNode.value = node;
+    activeTreeNodeId.value = node.id;
   }
+
+  nextTick(() => {
+    draw();
+  });
 };
-</script>
 
+watch(
+  currentFolder,
+  async () => {
+    stopAutoPlay();
+    index.value = 0;
+    activeTreeNodeId.value = null;
+    trees.value = await fetchTreeData(currentFolder.value);
+
+    startAutoPlay();
+    nextTick(() => {
+      draw();
+    });
+  },
+  {
+    immediate: true,
+  }
+);
+</script>
 <template>
-  <div class="wraper" ref="domref">
-    <div class="ballgraph">
-      <div
-        v-for="item of dataTree"
-        class="ballLine"
-        :style="`gap:${calGap(item.length)}px`"
-      >
+  <div>
+    <div class="paper-reference">
+      Demo page for
+      <a href="https://arxiv.org/abs/2410.17238" target="_blank">SELA</a>
+    </div>
+
+    <!-- 文件夹选择器 -->
+    <label class="dataset-label">Datasets</label>
+    <select v-model="currentFolder" class="folder-select">
+      <option v-for="folder in folders" :key="folder" :value="folder">
+        {{ folder }}
+      </option>
+    </select>
+
+    <div class="wraper" ref="domref">
+      <div class="ballgraph">
         <div
-          v-for="ite of item"
-          class="ball cursor-pointer"
-          :class="{
-            active: getRealItem(ite).active,
-            gray: getRealItem(ite).visits === 0,
-            selected: activeTreeNode?.id === ite.id,
-          }"
-          :id="getRealItem(ite).id"
-          :style="getStyle(getRealItem(ite))"
-          @click="setActiveTreeNode(ite)"
+          v-for="item of dataTree"
+          class="ballLine"
+          :style="`gap:${calGap(item.length)}px`"
         >
-          <div>{{ getRealItem(ite).dev_score }}</div>
-        </div>
-      </div>
-      <canvas class="linecanvas"></canvas>
-    </div>
-    <div class="colormap">
-      <section>
-        <div>
-          <span class="text-35px font-500">score</span>
-          <div>max</div>
-        </div>
-      </section>
-      <div class="colorblock"></div>
-      <div class="colorblock"></div>
-      <div class="colorblock"></div>
-      <div class="colorblock"></div>
-      <div class="colorblock"></div>
-      <div class="colorblock"></div>
-      <div>min</div>
-    </div>
-    <div class="rightpannel">
-      <template v-if="activeTreeNode">
-        <div class="node">
-          <div class="instruction bg">
-            Insight: {{ activeTreeNode.instruction }}
-          </div>
-          <div>
-            <pre><code v-html="hljs.highlight(getCode(activeTreeNode.code), { language: 'python' }).value"></code>
-          </pre>
+          <div
+            v-for="ite of item"
+            class="ball cursor-pointer"
+            :class="{
+              active: dataWithActive.find((n) => n.id === ite.id)?.active,
+              gray: dataWithActive.find((n) => n.id === ite.id)?.visits === 0,
+              selected: activeTreeNodeId === ite.id,
+            }"
+            :id="ite.id"
+            :style="
+              getStyle(dataWithActive.find((n) => n.id === ite.id) || ite)
+            "
+            @click="setActiveTreeNode(ite)"
+          >
+            <div>{{ getRealItem(ite).dev_score }}</div>
           </div>
         </div>
-      </template>
-      <template v-else>
-        <div v-for="node of activeNode" class="node">
-          <div class="instruction bg">Insight: {{ node.instruction }}</div>
+        <canvas class="linecanvas"></canvas>
+      </div>
+
+      <div class="colormap">
+        <section>
           <div>
-            <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code>
-          </pre>
+            <span class="text-35px font-500">score</span>
+            <div>max</div>
           </div>
-        </div>
-        <div class="stacknodes">
-          <div class="node">
-            <div class="instruction">Vannila instruction without insight</div>
+        </section>
+        <div class="colorblock"></div>
+        <div class="colorblock"></div>
+        <div class="colorblock"></div>
+        <div class="colorblock"></div>
+        <div class="colorblock"></div>
+        <div class="colorblock"></div>
+        <div>min</div>
+      </div>
+
+      <div class="rightpannel">
+        <template v-if="activeTreeNodeId">
+          <div v-for="node in activeNodePath" :key="node.id" class="node">
+            <div class="instruction bg">Insight: {{ node.instruction }}</div>
             <div>
-              <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+              <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code></pre>
             </div>
           </div>
-          <div class="node">
-            <div class="instruction">Vannila instruction without insight</div>
+        </template>
+        <!-- <template> -->
+        <template v-else>
+          <div v-for="node of activeNode" class="node">
+            <div class="instruction bg">Insight: {{ node.instruction }}</div>
             <div>
-              <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+              <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code></pre>
             </div>
           </div>
-          <div class="node">
-            <div class="instruction">Vannila instruction without insight</div>
-            <div>
-              <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+        </template>
+        <div class="stacknodes">
+          <div>
+            <div class="node">
+              <div class="instruction">Vannila instruction without insight</div>
+              <div>
+                <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+              </div>
+            </div>
+            <div class="node">
+              <div class="instruction">Vannila instruction without insight</div>
+              <div>
+                <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+              </div>
+            </div>
+            <div class="node">
+              <div class="instruction">Vannila instruction without insight</div>
+              <div>
+                <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+              </div>
             </div>
           </div>
         </div>
-      </template>
+
+        <div v-if="activeTreeNodeId" class="node">
+          <div class="instruction">Complete Code</div>
+          <div class="download-container">
+            <a
+              download
+              :href="`https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${currentFolder}/Node-${activeTreeNodeId}.ipynb`"
+              class="download-link"
+            >
+              <div class="file-info">
+                <div class="file-icon">
+                  <svg
+                    width="24"
+                    height="24"
+                    viewBox="0 0 24 24"
+                    fill="none"
+                    xmlns="http://www.w3.org/2000/svg"
+                  >
+                    <path
+                      d="M13 2H6C4.89543 2 4 2.89543 4 4V20C4 21.1046 4.89543 22 6 22H18C19.1046 22 20 21.1046 20 20V9L13 2Z"
+                      stroke="#88909B"
+                      stroke-width="2"
+                      stroke-linecap="round"
+                      stroke-linejoin="round"
+                    />
+                  </svg>
+                </div>
+                <div class="file-details">
+                  <div class="file-name">Node-{{ activeTreeNodeId }}.ipynb</div>
+                  <!-- <div class="file-size">40.61KB</div> -->
+                </div>
+              </div>
+              <div class="download-icon">
+                <svg
+                  width="24"
+                  height="24"
+                  viewBox="0 0 24 24"
+                  fill="none"
+                  xmlns="http://www.w3.org/2000/svg"
+                >
+                  <path
+                    d="M21 15V19C21 19.5304 20.7893 20.0391 20.4142 20.4142C20.0391 20.7893 19.5304 21 19 21H5C4.46957 21 3.96086 20.7893 3.58579 20.4142C3.21071 20.0391 3 19.5304 3 19V15"
+                    stroke="#88909B"
+                    stroke-width="2"
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                  />
+                  <path
+                    d="M7 10L12 15L17 10"
+                    stroke="#88909B"
+                    stroke-width="2"
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                  />
+                  <path
+                    d="M12 15V3"
+                    stroke="#88909B"
+                    stroke-width="2"
+                    stroke-linecap="round"
+                    stroke-linejoin="round"
+                  />
+                </svg>
+              </div>
+            </a>
+          </div>
+        </div>
+        <!-- </template> -->
+      </div>
     </div>
   </div>
 </template>
 
 <style scoped lang="scss">
+.folder-select {
+  margin: 10px;
+  padding: 5px 10px;
+  border: 1px solid #e5e6eb;
+  border-radius: 5px;
+  font-size: 16px;
+}
+
 .ballgraph {
   display: flex;
   flex-direction: column;
@@ -297,6 +566,7 @@ const setActiveTreeNode = (node) => {
   position: relative;
   flex: 1;
 }
+
 .linecanvas {
   position: absolute;
   left: 0;
@@ -305,10 +575,12 @@ const setActiveTreeNode = (node) => {
   height: 100%;
   z-index: -1;
 }
+
 .ballLine {
   display: flex;
   justify-content: center;
 }
+
 .ball {
   height: 60px;
   width: 60px;
@@ -316,7 +588,6 @@ const setActiveTreeNode = (node) => {
   background-color: #d4d4d4;
   font-size: 25px;
   color: #fff;
-
   display: flex;
   justify-content: center;
   flex-direction: column;
@@ -337,9 +608,11 @@ const setActiveTreeNode = (node) => {
   display: flex;
   padding: 8px;
 }
+
 .rightpannel {
   width: 500px;
 }
+
 .node {
   margin: 10px;
   border: 1px solid #e5e6eb;
@@ -350,6 +623,7 @@ const setActiveTreeNode = (node) => {
   box-sizing: border-box;
   box-shadow: 2px 2px 20px 1px rgba(0, 0, 0, 0.1);
 }
+
 .instruction {
   font-size: 20px;
   line-height: 1.5;
@@ -358,9 +632,11 @@ const setActiveTreeNode = (node) => {
   padding: 10px;
   border-radius: 0 10px 10px 0;
 }
+
 .bg {
   background-color: #ebf3fe;
 }
+
 pre {
   padding: 8px;
   border-radius: 5px;
@@ -382,6 +658,7 @@ pre {
   height: 40px;
   width: 40px;
 }
+
 .colormap > .colorblock:nth-of-type(1) {
   background-color: #2c3fcb;
 }
@@ -400,16 +677,20 @@ pre {
 .colormap > .colorblock:nth-of-type(6) {
   background-color: #c5d9fc;
 }
+
 .stacknodes {
   position: relative;
   margin-top: -10px;
+  height: 150px;
 }
+
 .stacknodes .node {
   position: absolute;
   width: 390px;
   background-color: #fff;
   z-index: 3;
 }
+
 .stacknodes .node:nth-child(2) {
   position: absolute;
   width: 390px;
@@ -417,6 +698,7 @@ pre {
   top: 10px;
   z-index: 2;
 }
+
 .stacknodes .node:nth-child(3) {
   position: absolute;
   width: 390px;
@@ -424,4 +706,97 @@ pre {
   top: 20px;
   z-index: 1;
 }
+
+// 新增的下载相关样式
+.download-container {
+  margin-top: 12px;
+  padding: 8px;
+}
+
+.download-link {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  padding: 12px;
+  border: 1px solid #e5e6eb;
+  border-radius: 8px;
+  text-decoration: none;
+  color: inherit;
+  background-color: #f7f8fa;
+  transition: all 0.2s ease;
+
+  &:hover {
+    background-color: #ebf3fe;
+    border-color: #d0d3d9;
+  }
+}
+
+.file-info {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+}
+
+.file-icon {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.file-details {
+  display: flex;
+  gap: 8px;
+  align-items: center;
+}
+
+.file-name {
+  font-size: 14px;
+  color: #1d2129;
+}
+
+.file-size {
+  font-size: 12px;
+  color: #88909b;
+}
+
+.download-icon {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  color: #88909b;
+  transition: transform 0.2s ease;
+}
+
+.download-link {
+  &:hover {
+    background-color: #ebf3fe;
+    border-color: #d0d3d9;
+
+    .download-icon {
+      transform: translateY(2px);
+    }
+  }
+}
+
+.dataset-label {
+  display: block;
+  margin: 10px 10px 5px;
+  color: #88909b;
+  font-size: 14px;
+}
+
+.paper-reference {
+  margin: 10px;
+  color: #1d2129;
+  font-size: 14px;
+
+  a {
+    color: #2c3fcb;
+    text-decoration: none;
+
+    &:hover {
+      text-decoration: underline;
+    }
+  }
+}
 </style>

From e032ef2b0a1ac51e4e3be42dde0a03804d032fb8 Mon Sep 17 00:00:00 2001
From: chenshuanglong <chenshuanglong@fuzhi.ai>
Date: Wed, 30 Oct 2024 22:34:37 +0800
Subject: [PATCH 05/13] feat: demo2

---
 src/components/demo2/demo2.vue | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index e22329e6..4202c0d4 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -1,7 +1,6 @@
 <script setup>
 import { ref, watch, nextTick, computed } from 'vue';
 import { useResizeObserver } from '@vueuse/core';
-import IpynbViewer from './ipynbViewer.vue';
 import hljs from 'highlight.js/lib/core';
 import python from 'highlight.js/lib/languages/python';
 import 'highlight.js/styles/github.css';
@@ -11,28 +10,11 @@ import 'highlight.js/styles/github.css';
 hljs.registerLanguage('python', python);
 const trees = ref([]);
 
-// export const downloadFile = (filename, blob) => {
-//   const url = window.URL.createObjectURL(blob);
-
-//   const a = document.createElement('a');
-//   a.href = url;
-//   a.download = filename;
-//   a.click();
-//   // 5.释放这个临时的对象url
-//   window.URL.revokeObjectURL(url);
-// };
-
 // 动态导入所有 JSON 文件
 // const trees = import.meta.glob('./trees/*/*.json', { eager: true });
 // 动态导入所有 ipynb 文件
 // const notebooks = import.meta.glob('./trees/*/*.ipynb');
 
-// 获取文件夹名称
-function getMiddlePath(path) {
-  const match = path.match(/\/trees\/([^/]+)\/[^/]+\.json$/);
-  return match ? match[1] : null;
-}
-
 // 获取所有文件夹名称
 const folders = [
   'Click_prediction_small',

From 7a50d501e6a0d8427515ebff197127deff210c53 Mon Sep 17 00:00:00 2001
From: chenshuanglong <chenshuanglong@fuzhi.ai>
Date: Wed, 30 Oct 2024 23:07:59 +0800
Subject: [PATCH 06/13] feat: Add loading state to demo2

---
 src/components/demo2/demo2.vue | 336 +++++++++++++++++++--------------
 1 file changed, 196 insertions(+), 140 deletions(-)

diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index 4202c0d4..815e7dc1 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -49,20 +49,28 @@ const list = computed(() => {
   return newList;
 });
 
+const loading = ref(false);
+
+// 修改fetchTreeData函数
 const fetchTreeData = async (folder) => {
-  const promises = [];
-  promises.push(
-    fetch(
-      `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${folder}/tree.json`
-    ).then((res) => res.json())
-  );
-  for (let i = 1; i < 11; i++) {
-    const url = `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${folder}/tree_${i
-      .toString()
-      .padStart(2, '0')}.json`;
-    promises.push(fetch(url).then((res) => res.json()));
+  loading.value = true;
+  try {
+    const promises = [];
+    promises.push(
+      fetch(
+        `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${folder}/tree.json`
+      ).then((res) => res.json())
+    );
+    for (let i = 1; i < 11; i++) {
+      const url = `https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${folder}/tree_${i
+        .toString()
+        .padStart(2, '0')}.json`;
+      promises.push(fetch(url).then((res) => res.json()));
+    }
+    return await Promise.all(promises);
+  } finally {
+    loading.value = false;
   }
-  return await Promise.all(promises);
 };
 
 // 处理树形数据
@@ -363,114 +371,150 @@ watch(
 );
 </script>
 <template>
-  <div>
-    <div class="paper-reference">
-      Demo page for
-      <a href="https://arxiv.org/abs/2410.17238" target="_blank">SELA</a>
+  <div class="demo-container">
+    <!-- 添加loading遮罩 -->
+    <div v-if="loading" class="loading-overlay">
+      <div class="loading-spinner"></div>
     </div>
 
-    <!-- 文件夹选择器 -->
-    <label class="dataset-label">Datasets</label>
-    <select v-model="currentFolder" class="folder-select">
-      <option v-for="folder in folders" :key="folder" :value="folder">
-        {{ folder }}
-      </option>
-    </select>
-
-    <div class="wraper" ref="domref">
-      <div class="ballgraph">
-        <div
-          v-for="item of dataTree"
-          class="ballLine"
-          :style="`gap:${calGap(item.length)}px`"
-        >
-          <div
-            v-for="ite of item"
-            class="ball cursor-pointer"
-            :class="{
-              active: dataWithActive.find((n) => n.id === ite.id)?.active,
-              gray: dataWithActive.find((n) => n.id === ite.id)?.visits === 0,
-              selected: activeTreeNodeId === ite.id,
-            }"
-            :id="ite.id"
-            :style="
-              getStyle(dataWithActive.find((n) => n.id === ite.id) || ite)
-            "
-            @click="setActiveTreeNode(ite)"
-          >
-            <div>{{ getRealItem(ite).dev_score }}</div>
-          </div>
-        </div>
-        <canvas class="linecanvas"></canvas>
+    <div>
+      <div class="paper-reference">
+        Demo page for
+        <a href="https://arxiv.org/abs/2410.17238" target="_blank">SELA</a>
       </div>
 
-      <div class="colormap">
-        <section>
-          <div>
-            <span class="text-35px font-500">score</span>
-            <div>max</div>
-          </div>
-        </section>
-        <div class="colorblock"></div>
-        <div class="colorblock"></div>
-        <div class="colorblock"></div>
-        <div class="colorblock"></div>
-        <div class="colorblock"></div>
-        <div class="colorblock"></div>
-        <div>min</div>
-      </div>
+      <!-- 文件夹选择器 -->
+      <label class="dataset-label">Datasets</label>
+      <select v-model="currentFolder" class="folder-select">
+        <option v-for="folder in folders" :key="folder" :value="folder">
+          {{ folder }}
+        </option>
+      </select>
 
-      <div class="rightpannel">
-        <template v-if="activeTreeNodeId">
-          <div v-for="node in activeNodePath" :key="node.id" class="node">
-            <div class="instruction bg">Insight: {{ node.instruction }}</div>
-            <div>
-              <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code></pre>
+      <div class="wraper" ref="domref">
+        <div class="ballgraph">
+          <div
+            v-for="item of dataTree"
+            class="ballLine"
+            :style="`gap:${calGap(item.length)}px`"
+          >
+            <div
+              v-for="ite of item"
+              class="ball cursor-pointer"
+              :class="{
+                active: dataWithActive.find((n) => n.id === ite.id)?.active,
+                gray: dataWithActive.find((n) => n.id === ite.id)?.visits === 0,
+                selected: activeTreeNodeId === ite.id,
+              }"
+              :id="ite.id"
+              :style="
+                getStyle(dataWithActive.find((n) => n.id === ite.id) || ite)
+              "
+              @click="setActiveTreeNode(ite)"
+            >
+              <div>{{ getRealItem(ite).dev_score }}</div>
             </div>
           </div>
-        </template>
-        <!-- <template> -->
-        <template v-else>
-          <div v-for="node of activeNode" class="node">
-            <div class="instruction bg">Insight: {{ node.instruction }}</div>
+          <canvas class="linecanvas"></canvas>
+        </div>
+
+        <div class="colormap">
+          <section>
             <div>
-              <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code></pre>
+              <span class="text-35px font-500">score</span>
+              <div>max</div>
             </div>
-          </div>
-        </template>
-        <div class="stacknodes">
-          <div>
-            <div class="node">
-              <div class="instruction">Vannila instruction without insight</div>
+          </section>
+          <div class="colorblock"></div>
+          <div class="colorblock"></div>
+          <div class="colorblock"></div>
+          <div class="colorblock"></div>
+          <div class="colorblock"></div>
+          <div class="colorblock"></div>
+          <div>min</div>
+        </div>
+
+        <div class="rightpannel">
+          <template v-if="activeTreeNodeId">
+            <div v-for="node in activeNodePath" :key="node.id" class="node">
+              <div class="instruction bg">Insight: {{ node.instruction }}</div>
               <div>
-                <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+                <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code></pre>
               </div>
             </div>
-            <div class="node">
-              <div class="instruction">Vannila instruction without insight</div>
+          </template>
+          <!-- <template> -->
+          <template v-else>
+            <div v-for="node of activeNode" class="node">
+              <div class="instruction bg">Insight: {{ node.instruction }}</div>
               <div>
-                <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+                <pre><code v-html="hljs.highlight(getCode(node.code), { language: 'python' }).value"></code></pre>
               </div>
             </div>
-            <div class="node">
-              <div class="instruction">Vannila instruction without insight</div>
-              <div>
-                <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+          </template>
+          <div class="stacknodes">
+            <div>
+              <div class="node">
+                <div class="instruction">
+                  Vannila instruction without insight
+                </div>
+                <div>
+                  <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+                </div>
+              </div>
+              <div class="node">
+                <div class="instruction">
+                  Vannila instruction without insight
+                </div>
+                <div>
+                  <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+                </div>
+              </div>
+              <div class="node">
+                <div class="instruction">
+                  Vannila instruction without insight
+                </div>
+                <div>
+                  <pre><code v-html="hljs.highlight('More generated code for the rest of the pipeline ...', { language: 'python' }).value"></code></pre>
+                </div>
               </div>
             </div>
           </div>
-        </div>
 
-        <div v-if="activeTreeNodeId" class="node">
-          <div class="instruction">Complete Code</div>
-          <div class="download-container">
-            <a
-              download
-              :href="`https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${currentFolder}/Node-${activeTreeNodeId}.ipynb`"
-              class="download-link"
-            >
-              <div class="file-info">
-                <div class="file-icon">
+          <div v-if="activeTreeNodeId" class="node">
+            <div class="instruction">Complete Code</div>
+            <div class="download-container">
+              <a
+                download
+                :href="`https://public-frontend-1300249583.cos.ap-nanjing.myqcloud.com/sela/notebooks/${currentFolder}/Node-${activeTreeNodeId}.ipynb`"
+                class="download-link"
+              >
+                <div class="file-info">
+                  <div class="file-icon">
+                    <svg
+                      width="24"
+                      height="24"
+                      viewBox="0 0 24 24"
+                      fill="none"
+                      xmlns="http://www.w3.org/2000/svg"
+                    >
+                      <path
+                        d="M13 2H6C4.89543 2 4 2.89543 4 4V20C4 21.1046 4.89543 22 6 22H18C19.1046 22 20 21.1046 20 20V9L13 2Z"
+                        stroke="#88909B"
+                        stroke-width="2"
+                        stroke-linecap="round"
+                        stroke-linejoin="round"
+                      />
+                    </svg>
+                  </div>
+                  <div class="file-details">
+                    <div class="file-name">
+                      Node-{{ activeTreeNodeId }}.ipynb
+                    </div>
+                    <!-- <div class="file-size">40.61KB</div> -->
+                  </div>
+                </div>
+                <div class="download-icon">
                   <svg
                     width="24"
                     height="24"
@@ -479,7 +523,21 @@ watch(
                     xmlns="http://www.w3.org/2000/svg"
                   >
                     <path
-                      d="M13 2H6C4.89543 2 4 2.89543 4 4V20C4 21.1046 4.89543 22 6 22H18C19.1046 22 20 21.1046 20 20V9L13 2Z"
+                      d="M21 15V19C21 19.5304 20.7893 20.0391 20.4142 20.4142C20.0391 20.7893 19.5304 21 19 21H5C4.46957 21 3.96086 20.7893 3.58579 20.4142C3.21071 20.0391 3 19.5304 3 19V15"
+                      stroke="#88909B"
+                      stroke-width="2"
+                      stroke-linecap="round"
+                      stroke-linejoin="round"
+                    />
+                    <path
+                      d="M7 10L12 15L17 10"
+                      stroke="#88909B"
+                      stroke-width="2"
+                      stroke-linecap="round"
+                      stroke-linejoin="round"
+                    />
+                    <path
+                      d="M12 15V3"
                       stroke="#88909B"
                       stroke-width="2"
                       stroke-linecap="round"
@@ -487,52 +545,50 @@ watch(
                     />
                   </svg>
                 </div>
-                <div class="file-details">
-                  <div class="file-name">Node-{{ activeTreeNodeId }}.ipynb</div>
-                  <!-- <div class="file-size">40.61KB</div> -->
-                </div>
-              </div>
-              <div class="download-icon">
-                <svg
-                  width="24"
-                  height="24"
-                  viewBox="0 0 24 24"
-                  fill="none"
-                  xmlns="http://www.w3.org/2000/svg"
-                >
-                  <path
-                    d="M21 15V19C21 19.5304 20.7893 20.0391 20.4142 20.4142C20.0391 20.7893 19.5304 21 19 21H5C4.46957 21 3.96086 20.7893 3.58579 20.4142C3.21071 20.0391 3 19.5304 3 19V15"
-                    stroke="#88909B"
-                    stroke-width="2"
-                    stroke-linecap="round"
-                    stroke-linejoin="round"
-                  />
-                  <path
-                    d="M7 10L12 15L17 10"
-                    stroke="#88909B"
-                    stroke-width="2"
-                    stroke-linecap="round"
-                    stroke-linejoin="round"
-                  />
-                  <path
-                    d="M12 15V3"
-                    stroke="#88909B"
-                    stroke-width="2"
-                    stroke-linecap="round"
-                    stroke-linejoin="round"
-                  />
-                </svg>
-              </div>
-            </a>
+              </a>
+            </div>
           </div>
+          <!-- </template> -->
         </div>
-        <!-- </template> -->
       </div>
     </div>
   </div>
 </template>
 
 <style scoped lang="scss">
+.demo-container {
+  position: relative;
+  min-height: 400px;
+}
+
+.loading-overlay {
+  position: absolute;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  background: rgba(255, 255, 255, 0.8);
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  z-index: 1000;
+}
+
+.loading-spinner {
+  width: 50px;
+  height: 50px;
+  border: 3px solid #ebf3fe;
+  border-radius: 50%;
+  border-top-color: #2c3fcb;
+  animation: spin 1s linear infinite;
+}
+
+@keyframes spin {
+  to {
+    transform: rotate(360deg);
+  }
+}
+
 .folder-select {
   margin: 10px;
   padding: 5px 10px;

From 71acadc58cda8d608ed0d9518a97903f9948dfa4 Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Wed, 30 Oct 2024 22:37:20 +0800
Subject: [PATCH 07/13] feat: add demo sale

---
 .gitignore                   |  1 +
 .vitepress/config.mts        | 34 +++++++++++++++++++++++++++-------
 src/DataInterpreter/index.md |  5 ++---
 src/sale/index.md            | 10 ++++++++++
 4 files changed, 40 insertions(+), 10 deletions(-)
 create mode 100644 src/sale/index.md

diff --git a/.gitignore b/.gitignore
index 3bc6e90d..3798c8fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,4 +15,5 @@ src/*/blog
 src/*/rfcs
 src/*/DataInterpreter
 src/*/demos
+src/*/sale
 src/utils/diff.ts
diff --git a/.vitepress/config.mts b/.vitepress/config.mts
index f4ff06ab..48bb98f8 100644
--- a/.vitepress/config.mts
+++ b/.vitepress/config.mts
@@ -56,7 +56,7 @@ const genRfcLinks = (dir: string, prefixPath = '') => {
 };
 
 const rfcLinks = genRfcLinks(resolve(__dirname, '../src/rfcs'));
-const sources = ['blog', 'rfcs', 'DataInterpreter'];
+const sources = ['blog', 'rfcs', 'DataInterpreter', 'sale'];
 const dests = ['zh', 'en'];
 
 const copyDir = (source: string, dest: string) => {
@@ -181,9 +181,19 @@ export default defineConfig({
             activeMatch: '/en/guide/',
           },
           {
-            text: 'Data Interpreter',
-            link: '/en/DataInterpreter/index',
-            activeMatch: '/en/DataInterpreter/',
+            text: 'Demo',
+            items: [
+              {
+                text: 'Data Interpreter',
+                link: '/en/DataInterpreter/index',
+                activeMatch: '/en/DataInterpreter/',
+              },
+              {
+                text: 'SALE',
+                link: '/en/sale/index',
+                activeMatch: '/en/sale/',
+              },
+            ],
           },
           ...arrVisible(
             [
@@ -453,9 +463,19 @@ export default defineConfig({
             activeMatch: '/zh/guide/',
           },
           {
-            text: 'Data Interpreter',
-            link: '/zh/DataInterpreter/index',
-            activeMatch: '/zh/DataInterpreter/',
+            text: '样例',
+            items: [
+              {
+                text: 'Data Interpreter',
+                link: '/zh/DataInterpreter/index',
+                activeMatch: '/zh/DataInterpreter/',
+              },
+              {
+                text: 'SALE',
+                link: '/zh/sale/index',
+                activeMatch: '/zh/sale/',
+              },
+            ],
           },
           ...arrVisible(
             [
diff --git a/src/DataInterpreter/index.md b/src/DataInterpreter/index.md
index d84a5413..a98a6681 100644
--- a/src/DataInterpreter/index.md
+++ b/src/DataInterpreter/index.md
@@ -4,8 +4,7 @@ footer: false
 ---
 
 <script setup>
-  // import DemoList from '@/components/demoList.vue';
-  import Demo2 from '@/components/demo2/demo2.vue';
+  import DemoList from '@/components/demoList.vue';
 </script>
 
-<Demo2 />
+<DemoList />
diff --git a/src/sale/index.md b/src/sale/index.md
new file mode 100644
index 00000000..38f4d7e3
--- /dev/null
+++ b/src/sale/index.md
@@ -0,0 +1,10 @@
+---
+layout: page
+footer: false
+---
+
+<script setup>
+  import Demo2 from '@/components/demo2/demo2.vue';
+</script>
+
+<Demo2 />

From 7e22b5172c20cc7d86663742eb725df03cd7d4e3 Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Wed, 30 Oct 2024 23:19:44 +0800
Subject: [PATCH 08/13] fix: bugfix

---
 .gitignore                     |  2 +-
 .vitepress/config.mts          | 22 +++++++++---------
 src/components/demo2/demo2.vue | 42 +++++++++++++++++++++++++++++-----
 src/{ => en}/sale/index.md     |  0
 src/sela/index.md              | 10 ++++++++
 src/zh/sale/index.md           | 10 ++++++++
 6 files changed, 68 insertions(+), 18 deletions(-)
 rename src/{ => en}/sale/index.md (100%)
 create mode 100644 src/sela/index.md
 create mode 100644 src/zh/sale/index.md

diff --git a/.gitignore b/.gitignore
index 3798c8fe..ed1b1e60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,5 +15,5 @@ src/*/blog
 src/*/rfcs
 src/*/DataInterpreter
 src/*/demos
-src/*/sale
+src/*/sela
 src/utils/diff.ts
diff --git a/.vitepress/config.mts b/.vitepress/config.mts
index 48bb98f8..b55e4894 100644
--- a/.vitepress/config.mts
+++ b/.vitepress/config.mts
@@ -56,7 +56,7 @@ const genRfcLinks = (dir: string, prefixPath = '') => {
 };
 
 const rfcLinks = genRfcLinks(resolve(__dirname, '../src/rfcs'));
-const sources = ['blog', 'rfcs', 'DataInterpreter', 'sale'];
+const sources = ['blog', 'rfcs', 'DataInterpreter', 'sela'];
 const dests = ['zh', 'en'];
 
 const copyDir = (source: string, dest: string) => {
@@ -188,11 +188,11 @@ export default defineConfig({
                 link: '/en/DataInterpreter/index',
                 activeMatch: '/en/DataInterpreter/',
               },
-              {
-                text: 'SALE',
-                link: '/en/sale/index',
-                activeMatch: '/en/sale/',
-              },
+              // {
+              //   text: 'SELA',
+              //   link: '/en/sale/index',
+              //   activeMatch: '/en/sale/',
+              // },
             ],
           },
           ...arrVisible(
@@ -470,11 +470,11 @@ export default defineConfig({
                 link: '/zh/DataInterpreter/index',
                 activeMatch: '/zh/DataInterpreter/',
               },
-              {
-                text: 'SALE',
-                link: '/zh/sale/index',
-                activeMatch: '/zh/sale/',
-              },
+              // {
+              //   text: 'SELA',
+              //   link: '/zh/sale/index',
+              //   activeMatch: '/zh/sale/',
+              // },
             ],
           },
           ...arrVisible(
diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index 815e7dc1..79f167b2 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -17,13 +17,13 @@ const trees = ref([]);
 
 // 获取所有文件夹名称
 const folders = [
+  'mfeat-factors',
   'Click_prediction_small',
   'GesturePhaseSegmentationProcessed',
   'credit-g',
   'jasmine',
   'kc1',
   'kick',
-  'mfeat-factors',
   'segment',
   'smoker-status',
   'software-defects',
@@ -170,6 +170,34 @@ const draw = () => {
         ctx.closePath();
       }
     }
+    const actived = dataWithActive.value.filter((_) => _.active);
+
+    for (let i = 0; i < actived.length; i++) {
+      const node = actived[i];
+      const el = document.getElementById(node.id);
+      const { offsetTop, offsetLeft, offsetWidth, offsetHeight } = el;
+      const target = document.querySelectorAll('.rightpannel .node')[i];
+
+      const {
+        offsetTop: ot,
+        offsetLeft: ol,
+        offsetWidth: ow,
+        offsetHeight: oh,
+      } = target;
+
+      ctx.beginPath();
+      ctx.strokeStyle = '#cacdd4';
+      ctx.setLineDash([4]);
+      ctx.lineWidth = 1;
+      ctx.moveTo(offsetLeft + offsetWidth, offsetTop + offsetHeight / 2);
+      ctx.lineTo(ol, ot - 119 - 8);
+      ctx.lineTo(ol, ot + oh - 119 - 8);
+      // ctx.lineTo(offsetLeft + offsetWidth, offsetTop + offsetHeight / 2);
+      ctx.fillStyle = '#0000000b';
+      ctx.fill();
+      // ctx.stroke();
+      ctx.closePath();
+    }
   }
 };
 
@@ -238,7 +266,9 @@ nextTick(() => {
   if (folders.length > 0) {
     currentFolder.value = folders[0];
   }
-  draw();
+  nextTick(() => {
+    draw();
+  });
 });
 
 watch([index], () => {
@@ -270,7 +300,7 @@ const startAutoPlay = () => {
         },
         { dev_score: -999 }
       );
-      // console.log('maxNode', maxNode);
+
       setActiveTreeNode(maxNode);
 
       if (interval) {
@@ -688,13 +718,13 @@ pre {
   position: fixed;
   left: 10px;
   bottom: 10px;
-  font-size: 30px;
+  font-size: 26px;
   color: #88909b;
 }
 
 .colormap > div {
-  height: 40px;
-  width: 40px;
+  height: 36px;
+  width: 36px;
 }
 
 .colormap > .colorblock:nth-of-type(1) {
diff --git a/src/sale/index.md b/src/en/sale/index.md
similarity index 100%
rename from src/sale/index.md
rename to src/en/sale/index.md
diff --git a/src/sela/index.md b/src/sela/index.md
new file mode 100644
index 00000000..38f4d7e3
--- /dev/null
+++ b/src/sela/index.md
@@ -0,0 +1,10 @@
+---
+layout: page
+footer: false
+---
+
+<script setup>
+  import Demo2 from '@/components/demo2/demo2.vue';
+</script>
+
+<Demo2 />
diff --git a/src/zh/sale/index.md b/src/zh/sale/index.md
new file mode 100644
index 00000000..38f4d7e3
--- /dev/null
+++ b/src/zh/sale/index.md
@@ -0,0 +1,10 @@
+---
+layout: page
+footer: false
+---
+
+<script setup>
+  import Demo2 from '@/components/demo2/demo2.vue';
+</script>
+
+<Demo2 />

From 15f3f5c196a3c07ac4be30e0d666de7e9089da19 Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Wed, 30 Oct 2024 23:26:00 +0800
Subject: [PATCH 09/13] fix: delete file

---
 src/en/sale/index.md | 10 ----------
 src/zh/sale/index.md | 10 ----------
 2 files changed, 20 deletions(-)
 delete mode 100644 src/en/sale/index.md
 delete mode 100644 src/zh/sale/index.md

diff --git a/src/en/sale/index.md b/src/en/sale/index.md
deleted file mode 100644
index 38f4d7e3..00000000
--- a/src/en/sale/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-layout: page
-footer: false
----
-
-<script setup>
-  import Demo2 from '@/components/demo2/demo2.vue';
-</script>
-
-<Demo2 />
diff --git a/src/zh/sale/index.md b/src/zh/sale/index.md
deleted file mode 100644
index 38f4d7e3..00000000
--- a/src/zh/sale/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-layout: page
-footer: false
----
-
-<script setup>
-  import Demo2 from '@/components/demo2/demo2.vue';
-</script>
-
-<Demo2 />

From 9e8d3120e8d6588296269a9d3190dfc7f786410f Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Wed, 30 Oct 2024 23:30:00 +0800
Subject: [PATCH 10/13] fix: add ts lib

---
 tsconfig.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tsconfig.json b/tsconfig.json
index 23e223d8..63105e02 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -10,6 +10,7 @@
     "strict": true,
     "jsx": "preserve",
     "jsxImportSource": "vue",
+    "lib": ["esnext", "dom", "ESNext.Array"],
     "baseUrl": ".",
     "paths": {
       "@theme/*": [".vitepress/theme/*"],

From 65634d444533a0e5da77ec92cd6b02544bbbf92d Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Wed, 30 Oct 2024 23:38:55 +0800
Subject: [PATCH 11/13] fix: bugfix

---
 src/components/demo2/demo2.vue | 38 ++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index 79f167b2..73036f01 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -262,7 +262,7 @@ const activeNodePath = computed(() => {
 
 useResizeObserver(domref, draw);
 
-nextTick(() => {
+onMounted(() => {
   if (folders.length > 0) {
     currentFolder.value = folders[0];
   }
@@ -382,23 +382,25 @@ const setActiveTreeNode = (node) => {
   });
 };
 
-watch(
-  currentFolder,
-  async () => {
-    stopAutoPlay();
-    index.value = 0;
-    activeTreeNodeId.value = null;
-    trees.value = await fetchTreeData(currentFolder.value);
-
-    startAutoPlay();
-    nextTick(() => {
-      draw();
-    });
-  },
-  {
-    immediate: true,
-  }
-);
+onMounted(() => {
+  watch(
+    currentFolder,
+    async () => {
+      stopAutoPlay();
+      index.value = 0;
+      activeTreeNodeId.value = null;
+      trees.value = await fetchTreeData(currentFolder.value);
+
+      startAutoPlay();
+      nextTick(() => {
+        draw();
+      });
+    },
+    {
+      immediate: true,
+    }
+  );
+});
 </script>
 <template>
   <div class="demo-container">

From 3016199fc2fbc2bfa5ec9a7195e434e38cab4473 Mon Sep 17 00:00:00 2001
From: zerolee <464806884@qq.com>
Date: Thu, 31 Oct 2024 10:30:34 +0800
Subject: [PATCH 12/13] fix: fix

---
 src/components/demo2/demo2.vue | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index 73036f01..6dac324d 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -41,9 +41,9 @@ const list = computed(() => {
 
   const newList = trees.value;
 
-  const scores = newList[newList.length - 1]?.map((_) =>
-    Number(_.dev_score)
-  ) || [0, 100];
+  const scores = newList[newList.length - 1]
+    ?.filter((item) => item.visits >= 1)
+    ?.map((_) => Number(_.dev_score)) || [0, 100];
   maxScore = Math.max(...scores);
   minScore = Math.min(...scores);
   return newList;
@@ -106,7 +106,7 @@ const hasData = (d) => {
 
 const calGap = (count) => {
   const maxw = document.body.clientWidth - 600;
-  return Math.max((maxw - count * 60) / count, 10);
+  return Math.max((maxw - count * 40) / count, 8);
 };
 
 const prev = () => {
@@ -632,7 +632,7 @@ onMounted(() => {
 .ballgraph {
   display: flex;
   flex-direction: column;
-  gap: 300px;
+  gap: 160px;
   position: relative;
   flex: 1;
 }
@@ -652,11 +652,11 @@ onMounted(() => {
 }
 
 .ball {
-  height: 60px;
-  width: 60px;
-  border-radius: 20px;
+  height: 40px;
+  width: 40px;
+  border-radius: 12px;
   background-color: #d4d4d4;
-  font-size: 25px;
+  font-size: 14px;
   color: #fff;
   display: flex;
   justify-content: center;

From 28a80d52e869d9f72ab157f2acaf65feeed2a6fa Mon Sep 17 00:00:00 2001
From: chenshuanglong <chenshuanglong@fuzhi.ai>
Date: Thu, 31 Oct 2024 11:26:32 +0800
Subject: [PATCH 13/13] feat: Adjusted demo2

---
 .vitepress/config.mts          | 20 ++++++++++----------
 src/components/demo2/demo2.vue | 25 +++++++++++++++++--------
 2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/.vitepress/config.mts b/.vitepress/config.mts
index b55e4894..0579bac0 100644
--- a/.vitepress/config.mts
+++ b/.vitepress/config.mts
@@ -188,11 +188,11 @@ export default defineConfig({
                 link: '/en/DataInterpreter/index',
                 activeMatch: '/en/DataInterpreter/',
               },
-              // {
-              //   text: 'SELA',
-              //   link: '/en/sale/index',
-              //   activeMatch: '/en/sale/',
-              // },
+              {
+                text: 'SELA',
+                link: '/en/sela/index',
+                activeMatch: '/en/sela/',
+              },
             ],
           },
           ...arrVisible(
@@ -470,11 +470,11 @@ export default defineConfig({
                 link: '/zh/DataInterpreter/index',
                 activeMatch: '/zh/DataInterpreter/',
               },
-              // {
-              //   text: 'SELA',
-              //   link: '/zh/sale/index',
-              //   activeMatch: '/zh/sale/',
-              // },
+              {
+                text: 'SELA',
+                link: '/zh/sela/index',
+                activeMatch: '/zh/sela/',
+              },
             ],
           },
           ...arrVisible(
diff --git a/src/components/demo2/demo2.vue b/src/components/demo2/demo2.vue
index 6dac324d..1345d3d0 100644
--- a/src/components/demo2/demo2.vue
+++ b/src/components/demo2/demo2.vue
@@ -202,6 +202,12 @@ const draw = () => {
 };
 
 const activeTreeNodeId = ref(null);
+const activeTreeNode = computed(() => {
+  return (
+    list.value?.[index.value]?.find((_) => _.id === activeTreeNodeId.value) ||
+    {}
+  );
+});
 // 获取从根节点到目标节点的路径ID数组
 const getNodePathIds = (nodeId, allNodes) => {
   const pathIds = [];
@@ -453,7 +459,7 @@ onMounted(() => {
         <div class="colormap">
           <section>
             <div>
-              <span class="text-35px font-500">score</span>
+              <span class="text-24px font-500">score</span>
               <div>max</div>
             </div>
           </section>
@@ -513,7 +519,10 @@ onMounted(() => {
             </div>
           </div>
 
-          <div v-if="activeTreeNodeId" class="node">
+          <div
+            v-if="activeTreeNodeId && activeTreeNode?.visits >= 1"
+            class="node"
+          >
             <div class="instruction">Complete Code</div>
             <div class="download-container">
               <a
@@ -695,11 +704,11 @@ onMounted(() => {
 }
 
 .instruction {
-  font-size: 20px;
+  font-size: 16px;
   line-height: 1.5;
   color: #1d2129;
   border-left: 1px solid #e5e6eb;
-  padding: 10px;
+  padding: 8px 10px;
   border-radius: 0 10px 10px 0;
 }
 
@@ -712,7 +721,7 @@ pre {
   border-radius: 5px;
   background-color: #e5e6eb;
   overflow: auto;
-  font-size: 10px;
+  font-size: 12px;
   line-height: 1.5;
 }
 
@@ -720,13 +729,13 @@ pre {
   position: fixed;
   left: 10px;
   bottom: 10px;
-  font-size: 26px;
+  font-size: 16px;
   color: #88909b;
 }
 
 .colormap > div {
-  height: 36px;
-  width: 36px;
+  height: 24px;
+  width: 24px;
 }
 
 .colormap > .colorblock:nth-of-type(1) {