-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis_result.py
70 lines (55 loc) · 2.16 KB
/
analysis_result.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
# Load the data
data = pd.read_csv('/mnt/data/each_slide_patch_num_label_vitb.csv')
data.columns = ['data1', 'data2', 'data3', 'data4', 'label']
# Descriptive statistics
desc_stats = data.describe()
# Plotting descriptive statistics
desc_stats.drop(['count']).plot(kind='bar', figsize=(15, 7), title="Descriptive Statistics")
plt.ylabel('Value')
plt.show()
# Correlation analysis
correlation_matrix = data.corr()
# Plotting the correlation heatmap
plt.figure(figsize=(10,8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap')
plt.show()
# Splitting the data into training and testing sets
X = data[['data1', 'data2', 'data3', 'data4']]
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Logistic regression model
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
# Model evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
# Plotting the confusion matrix
plt.figure(figsize=(8,6))
sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
# Plot histograms for each feature
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
fig.suptitle('Distribution of Features', fontsize=16)
sns.histplot(data=data, x='data1', kde=True, ax=axes[0,0])
axes[0,0].set_title('Distribution of data1')
sns.histplot(data=data, x='data2', kde=True, ax=axes[0,1])
axes[0,1].set_title('Distribution of data2')
sns.histplot(data=data, x='data3', kde=True, ax=axes[1,0])
axes[1,0].set_title('Distribution of data3')
sns.histplot(data=data, x='data4', kde=True, ax=axes[1,1])
axes[1,1].set_title('Distribution of data4')
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.show()