Skip to content

Commit

Permalink
cleaning up old stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Rene bidart committed Nov 21, 2017
1 parent 78af396 commit 5d904ec
Show file tree
Hide file tree
Showing 36 changed files with 858 additions and 499 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
**/.pyc
**/.ipynb_checkpoints
.ipynb_checkpoints
**/.DS_Store
**/.DS_Store?
**/.h5
Expand Down
32 changes: 32 additions & 0 deletions notebooks/explore/Untitled.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Try CNN with input as 7x7, and then visualize these . filters. Make sure to scale it to 1-255"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"metadata": {},
"source": [
"# Predictions based on features\n",
"Test models both on the 8 class and the two-class classification problems. Grid search for parameters for 2 and and 8 class.\n",
"Grid search for parameters for both 8-class and the 2-class classification problems\n",
"\n",
"This is based off of http://ieeexplore.ieee.org.proxy.lib.uwaterloo.ca/stamp/stamp.jsp?arnumber=7312934, where they use a five-fold cross validation with the 5 train and test sets already defined by them. The hyperparameter tuning is done using a random subset of the training set (without dividing by patient)\n",
"\n",
Expand All @@ -19,6 +19,44 @@
"Another method is to extract patches from the orignial image to train the neural net on. For testing the predictions of multiple patches can be averaged."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import glob\n",
"import random\n",
"import numpy as np\n",
"import pandas as pd\n",
"from PIL import Image\n",
"import matplotlib.pyplot as plt \n",
"from matplotlib.pyplot import imshow\n",
"from IPython.display import display, HTML\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"% matplotlib inline\n",
"\n",
"\n",
"# Import modules every time you run code imported using %aimport\n",
"%load_ext autoreload\n",
"%autoreload 1\n",
"\n",
"# Add the src directory for functions\n",
"src_dir = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), 'src')\n",
"print(src_dir)\n",
"sys.path.append(src_dir)\n",
"\n",
"# import my functions:\n",
"%aimport models\n",
"from models import*\n",
"\n",
"# Base Directory where data is stored\n",
"base_data_dir = '/Users/rb/Documents/waterloo/projects/breakHis/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
Expand Down Expand Up @@ -291,25 +329,14 @@
}
],
"source": [
"import os\n",
"import sys\n",
"import glob\n",
"import random\n",
"import numpy as np\n",
"import pandas as pd\n",
"from PIL import Image\n",
"import matplotlib.pyplot as plt \n",
"from matplotlib.pyplot import imshow\n",
"from IPython.display import display, HTML\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"% matplotlib inline\n",
"\n",
"sys.path.insert(0, '/Users/rb/Google_Drive/Waterloo/projects/breakHis/src')\n",
"from models import*\n",
"\n",
"# load the train data\n",
"train_features = np.load('/Users/rb/Documents/waterloo/projects/breakHis/features/vgg/fold1/100/train/train_feat_vgg_100_aug1.npy')\n",
"train_dir = os.path.join(base_data_dir, 'features/vgg/fold1/100/train/train_feat_vgg_100_aug1.npy')\n",
"train_features = np.load(train_dir)\n",
"print('train_features.shape', train_features.shape)\n",
"\n",
"y_train = train_features[:,:8]\n",
Expand All @@ -326,7 +353,8 @@
" y_bin_train[index, 1] = 1\n",
" \n",
"# load the valid data\n",
"valid_features = np.load('/Users/rb/Documents/waterloo/projects/breakHis/features/vgg/fold1/100/valid/valid_feat_vgg_100_aug1.npy')\n",
"valid_dir = os.path.join(base_data_dir, 'features/vgg/fold1/100/valid/valid_feat_vgg_100_aug1.npy')\n",
"valid_features = np.load(valid_dir)\n",
"print('valid_features.shape', valid_features.shape)\n",
"y_valid = valid_features[:,:8]\n",
"x_valid = valid_features[:,8:]\n",
Expand Down
4 changes: 1 addition & 3 deletions notebooks/explore/baseline_feature_extraction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@
"out_loc = os.path.join(base_data_dir, 'features', 'vgg') \n",
"size = 100\n",
"\n",
"# be lazy and do 1:\n",
"n_folds = 6\n",
"\n",
"for i in range(1, n_folds, 1):\n",
Expand Down Expand Up @@ -130,7 +129,6 @@
"\n",
" all_features = get_freatures_vgg(generator, cur_loc, samples=8, classes=8, batch_size=1)\n",
" np.save(os.path.join(new_loc, new_dir+'_feat_vgg_'+str(size)+'_aug1.npy'), all_features)\n",
"\n",
" \n",
"for i in range(1, n_folds, 1):\n",
" new_dir = 'valid'\n",
Expand Down Expand Up @@ -161,7 +159,7 @@
"\n",
" all_features = get_freatures_vgg(generator, cur_loc, samples=8, classes=8, batch_size=1)\n",
" np.save(os.path.join(new_loc, new_dir+'_feat_vgg_'+str(size)+'_aug1.npy'), all_features)\n",
" \n",
"\n",
" \n",
"for i in range(1, n_folds, 1):\n",
" new_dir = 'test'\n",
Expand Down
200 changes: 200 additions & 0 deletions notebooks/final/test_vgg_features_cv.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cross Validation for models on VGG features \n",
"* Using the folds defined in http://ieeexplore.ieee.org.proxy.lib.uwaterloo.ca/stamp/stamp.jsp?arnumber=7312934\n",
"* For binary and and 8 class\n",
"* Using best hyperparameters found in VGG_features_hyperparameter search\n",
"* Training is done using the full training set, with no validation set.\n",
"* This was re-run on a newly created dataset compared to the hyperparameter search, because it got overly high accuracy for fold 1."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"/home/rbbidart/breakHis/src\n"
]
}
],
"source": [
"import os\n",
"import sys\n",
"import glob\n",
"import random\n",
"import numpy as np\n",
"import pandas as pd\n",
"from PIL import Image\n",
"import matplotlib.pyplot as plt \n",
"from matplotlib.pyplot import imshow\n",
"from IPython.display import display, HTML\n",
"from sklearn.metrics import accuracy_score\n",
"% matplotlib inline\n",
"\n",
"\n",
"# Import modules every time you run code imported using %aimport\n",
"%load_ext autoreload\n",
"%autoreload 1\n",
"\n",
"# Add the src directory for functions\n",
"src_dir = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), 'src')\n",
"print(src_dir)\n",
"sys.path.append(src_dir)\n",
"\n",
"# import my functions:\n",
"%aimport models\n",
"from models import*\n",
"%aimport functions\n",
"from functions import*\n",
"\n",
"# Base Directory where data is stored\n",
"base_data_dir = '/home/rbbidart/project/rbbidart/breakHis/'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Logistic Regression\n",
"* Binary C=.1\n",
"* 8-class C=1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fold 0\n"
]
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"model_2 = LogisticRegression(C=.1)\n",
"model_8 = LogisticRegression(C=1)\n",
"cv_features(model_2, model_8, base_data_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest\n",
"* Binary 160, 3\n",
"* 8-class 160, 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fold 0\n",
"Fold 1\n",
"Fold 2\n",
"Fold 3\n",
"Fold 4\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"model_2 = RandomForestClassifier(n_estimators=160, min_samples_split=3)\n",
"model_8 = RandomForestClassifier(n_estimators=160, min_samples_split=3)\n",
"cv_features(model_2, model_8, base_data_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## XGBoost\n",
"* Binary: n_estimators=200\tmax_depth=8\tlearning_rate=0.3\treg_lambda=2\n",
"* 8-class: n_estimators=250 max_depth=9 learning_rate=0.3\treg_lambda=2 (guess)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from xgboost import XGBClassifier\n",
"\n",
"model_2 = XGBClassifier(n_estimators=200, max_depth=8, learning_rate=0.3, reg_lambda=2)\n",
"model_8 = XGBClassifier(n_estimators=200, max_depth=8, learning_rate=0.3, reg_lambda=2)\n",
"cv_features(model_2, model_8, base_data_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## KNN\n",
"* 9 neighbours"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier\n",
"\n",
"model_2 = KNeighborsClassifier(n_neighbors=9)\n",
"model_8 = KNeighborsClassifier(n_neighbors=9)\n",
"cv_features(model_2, model_8, base_data_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file removed src/._train_models_k.py
Binary file not shown.
Binary file added src/__pycache__/functions.cpython-35.pyc
Binary file not shown.
Binary file added src/__pycache__/models.cpython-35.pyc
Binary file not shown.
Loading

0 comments on commit 5d904ec

Please sign in to comment.