forked from HealthCatalyst/healthcareai-py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_regression_1.py
62 lines (45 loc) · 2.17 KB
/
example_regression_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""Creates and compares regression models using sample clinical data.
Please use this example to learn about healthcareai before moving on to the next example.
If you have not installed healthcare.ai, refer to the instructions here:
http://healthcareai-py.readthedocs.io
To run this example:
python3 example_regression_1.py
This code uses the DiabetesClinicalSampleData.csv source file.
"""
import pandas as pd
from healthcareai.supvervised_model_trainer import SupervisedModelTrainer
def main():
# Load data from a sample .csv file
dataframe = pd.read_csv('healthcareai/tests/fixtures/DiabetesClinicalSampleData.csv', na_values=['None'])
# Load data from a MSSQL server: Uncomment to pull data from MSSQL server
# server = 'localhost'
# database = 'SAM'
# query = """SELECT *
# FROM [SAM].[dbo].[DiabetesClincialSampleData]
# -- In this step, just grab rows that have a target
# WHERE ThirtyDayReadmitFLG is not null"""
#
# engine = hcaidb.build_mssql_engine(server=server, database=database)
# dataframe = pd.read_sql(query, engine)
# Drop columns that won't help machine learning
dataframe.drop(['PatientID'], axis=1, inplace=True)
# Step 1: Setup a healthcareai regression trainer. This prepares your data for model building
regression_trainer = SupervisedModelTrainer(
dataframe=dataframe,
predicted_column='SystolicBPNBR',
model_type='regression',
grain_column='PatientEncounterID',
impute=True,
verbose=False)
# Look at the first few rows of your dataframe after loading the data
print('\n\n-------------------[ Cleaned Dataframe ]--------------------------')
print(regression_trainer.clean_dataframe.head())
# Step 2: train some models
# Train and evaluate linear regression model
trained_linear_model = regression_trainer.linear_regression()
# Train and evaluate random forest model
trained_random_forest = regression_trainer.random_forest_regression()
# Once you are happy with the result of the trained model, it is time to save the model.
trained_linear_model.save()
if __name__ == "__main__":
main()