diff --git a/README.md b/README.md index c78ac91..7e6e1f0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Machine Learning Quality and Maturity Package -This repository houses the Machine Learning (ML) Quality and Maturity Framework package developed at Booking.com.\ +This repository houses the Machine Learning (ML) Quality and Maturity Framework package.\ The structure of the repository is delineated as follows: * The primary class, located in `assessments.py`, serves the function of generating the final quality and maturity score.\ diff --git a/ml_quality/constants.py b/ml_quality/constants.py index d1cf5c5..30655f9 100644 --- a/ml_quality/constants.py +++ b/ml_quality/constants.py @@ -14,7 +14,7 @@ poc="proof of concept", production_non_critical="production non-critical", production_critical="production critical" ) -SUB_EXPLANATION_URL = "https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#" +SUB_EXPLANATION_URL = "" MATURITY_LEVELS_URL = "" @@ -35,7 +35,7 @@ class Gap(MultiValueEnum): PLOTS_FOLDER = "ml_quality_plots" ALL_MODELS_SUMMARY_FOLDER = "all_models_summary" RADAR_CHART_NAME_SUFFIX = "radar_chart" -GIT_REPO_URL = "https://github.com/bookingcom/ml_quality_maturity_framework" +GIT_REPO_URL = "" ASSESSMENTS_URL = f"{GIT_REPO_URL}/ml_quality/assessments" ALLOWED_IMAGE_FORMATS = ["png", "jpeg", "jpg"] DATE_FORMAT = "%Y-%m-%d" diff --git a/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_2/gaps.csv b/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_2/gaps.csv index 60ad1f4..86d2295 100644 --- a/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_2/gaps.csv +++ b/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_2/gaps.csv @@ -1,26 +1,26 @@ sub_characteristic,gap_value,url,reasoning,team_name,business_criticality,model_family -accuracy,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#accuracy,"There is no comparison with a simple baseline, neither input data validation.",team_B,poc,model_family_b -effectiveness,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#effectiveness,There is no full-on experiment associated with the model.,team_B,poc,model_family_b -responsiveness,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#responsiveness,Latency/throughput requirements not known,team_B,poc,model_family_b -usability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#usability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_B,poc,model_family_b -cost_effectiveness,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#cost-effectiveness,There is no full-on experiment associated with the model.,team_B,poc,model_family_b -efficiency,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#efficiency,"",Tech Content Agency,poc,model_family_b -availability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#availability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_B,poc,model_family_b -resilience,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#resilience,There are more than 5 failures per quarter.,team_B,poc,model_family_b -adaptability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#adaptability,The model cannot adapt to changes in the environment.,team_B,poc,model_family_b -scalability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#scalability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_B,poc,model_family_b -maintainability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#maintainability,The model's source code is not versioned or repository link has not been provided.,team_B,poc,model_family_b -modularity,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#modularity,The model's code is non-modular or modularity has not been assessed.,team_B,poc,model_family_b -testability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#testability,The source code is not unit-tested.,team_B,poc,model_family_b -repeatability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#repeatability,Repeating the ML lifecycle is completely manual.,team_B,poc,model_family_b -operability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#operability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)"team_B,poc,model_family_b -monitoring,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#monitoring,"There is no monitoring of ML performance, features, inputs, business metrics.",team_B,poc,model_family_b -discoverability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#discoverability,"The model is deployed in a discoverable serving system (e.g. AWS, etc).",team_B,poc,model_family_b -traceability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#traceability,Metadata and artifacts are not being logged.,team_B,poc,model_family_b -understandability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#understandability,No documentation of the ML system has been provided.,team_B,poc,model_family_b -explainability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#explainability,It is not possible to explain the model predictions.,team_B,poc,model_family_b -fairness,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#fairness,Fairness requirement to be determined by the Risk Assessment Questionnaire. No requirement for now.,team_B,poc,model_family_b -ownership,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#ownership,The model is owned by Team B.,team_B,poc,model_family_b -standards_compliance,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#standards-compliance,Applicable standards are not known or the model does not comply with them.,team_B,poc,model_family_b -vulnerability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#vulnerability,"",team_B,poc,model_family_b -readability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#readability,"Readability is not assessed, or the code is not easily readable, functions/variables have non-human readable names.",team_B,poc,model_family_b +accuracy,large,,"There is no comparison with a simple baseline, neither input data validation.",team_B,poc,model_family_b +effectiveness,large,,There is no full-on experiment associated with the model.,team_B,poc,model_family_b +responsiveness,large,,Latency/throughput requirements not known,team_B,poc,model_family_b +usability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_B,poc,model_family_b +cost_effectiveness,large,,There is no full-on experiment associated with the model.,team_B,poc,model_family_b +efficiency,large,,"",Tech Content Agency,poc,model_family_b +availability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_B,poc,model_family_b +resilience,large,,There are more than 5 failures per quarter.,team_B,poc,model_family_b +adaptability,large,,The model cannot adapt to changes in the environment.,team_B,poc,model_family_b +scalability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_B,poc,model_family_b +maintainability,large,,The model's source code is not versioned or repository link has not been provided.,team_B,poc,model_family_b +modularity,large,,The model's code is non-modular or modularity has not been assessed.,team_B,poc,model_family_b +testability,large,,The source code is not unit-tested.,team_B,poc,model_family_b +repeatability,large,,Repeating the ML lifecycle is completely manual.,team_B,poc,model_family_b +operability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)"team_B,poc,model_family_b +monitoring,large,,"There is no monitoring of ML performance, features, inputs, business metrics.",team_B,poc,model_family_b +discoverability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc).",team_B,poc,model_family_b +traceability,large,,Metadata and artifacts are not being logged.,team_B,poc,model_family_b +understandability,large,,No documentation of the ML system has been provided.,team_B,poc,model_family_b +explainability,large,,It is not possible to explain the model predictions.,team_B,poc,model_family_b +fairness,no,,Fairness requirement to be determined by the Risk Assessment Questionnaire. No requirement for now.,team_B,poc,model_family_b +ownership,no,,The model is owned by Team B.,team_B,poc,model_family_b +standards_compliance,large,,Applicable standards are not known or the model does not comply with them.,team_B,poc,model_family_b +vulnerability,large,,"",team_B,poc,model_family_b +readability,large,,"Readability is not assessed, or the code is not easily readable, functions/variables have non-human readable names.",team_B,poc,model_family_b diff --git a/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_3/gaps.csv b/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_3/gaps.csv index d7288b4..abd0614 100644 --- a/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_3/gaps.csv +++ b/ml_quality/ml_quality/tests/test_data/test_inputs/inputs/gaps/model_name=test_model_3/gaps.csv @@ -1,26 +1,26 @@ sub_characteristic,gap_value,url,reasoning,team_name,business_criticality,model_family -accuracy,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#accuracy,"There is no comparison with a simple baseline, neither input data validation.",team_A,poc,model_family_name -effectiveness,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#effectiveness,There is no full-on experiment associated with the model.,team_A,poc,model_family_name -responsiveness,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#responsiveness,Latency/throughput requirements not known,team_A,poc,model_family_name -usability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#usability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name -cost_effectiveness,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#cost-effectiveness,There is no full-on experiment associated with the model.,team_A,poc,model_family_name -efficiency,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#efficiency,"",team_A,poc,model_family_name -availability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#availability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name -resilience,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#resilience,There are more than 5 failures per quarter.,team_A,poc,model_family_name -adaptability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#adaptability,The model cannot adapt to changes in the environment.,team_A,poc,model_family_name -scalability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#scalability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name -maintainability,small,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#maintainability,"The code is versioned in Git, but its readability has room for improvement.",team_A,poc,model_family_name -modularity,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#modularity,The model's code is non-modular or modularity has not been assessed.,team_A,poc,model_family_name -testability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#testability,The source code is not unit-tested.,team_A,poc,model_family_name -repeatability,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#repeatability,Repeating the ML lifecycle is completely manual.,team_A,poc,model_family_name -operability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#operability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name -monitoring,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#monitoring,"There is no monitoring of ML performance, features, inputs, business metrics.",team_A,poc,model_family_name -discoverability,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#discoverability,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name -traceability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#traceability,Metadata and artifacts are not being logged.,team_A,poc,model_family_name -understandability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#understandability,No documentation of the ML system has been provided.,team_A,poc,model_family_name -explainability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#explainability,It is not possible to explain the model predictions.,team_A,poc,model_family_name -fairness,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#fairness,Fairness requirement to be determined by the Risk Assessment Questionnaire. No requirement for now.,team_A,poc,model_family_name -ownership,no,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#ownership,The model is owned by team A.,team_A,poc,model_family_name -standards_compliance,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#standards-compliance,Applicable standards are not known or the model does not comply with them.,team_A,poc,model_family_name -vulnerability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#vulnerability,"",team_A,poc,model_family_name -readability,large,https://github.com/bookingcom/ml-quality-model/blob/master/docs/subcharacteristics.md#readability,"Readability is not assessed, or the code is not easily readable, functions/variables have non-human readable names.",team_A,poc,model_family_name +accuracy,large,,"There is no comparison with a simple baseline, neither input data validation.",team_A,poc,model_family_name +effectiveness,large,,There is no full-on experiment associated with the model.,team_A,poc,model_family_name +responsiveness,large,,Latency/throughput requirements not known,team_A,poc,model_family_name +usability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name +cost_effectiveness,large,,There is no full-on experiment associated with the model.,team_A,poc,model_family_name +efficiency,large,,"",team_A,poc,model_family_name +availability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name +resilience,large,,There are more than 5 failures per quarter.,team_A,poc,model_family_name +adaptability,large,,The model cannot adapt to changes in the environment.,team_A,poc,model_family_name +scalability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name +maintainability,small,,"The code is versioned in Git, but its readability has room for improvement.",team_A,poc,model_family_name +modularity,large,,The model's code is non-modular or modularity has not been assessed.,team_A,poc,model_family_name +testability,large,,The source code is not unit-tested.,team_A,poc,model_family_name +repeatability,,Repeating the ML lifecycle is completely manual.,team_A,poc,model_family_name +operability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name +monitoring,large,,"There is no monitoring of ML performance, features, inputs, business metrics.",team_A,poc,model_family_name +discoverability,no,,"The model is deployed in a discoverable serving system (e.g. AWS, etc)",team_A,poc,model_family_name +traceability,large,,Metadata and artifacts are not being logged.,team_A,poc,model_family_name +understandability,large,,No documentation of the ML system has been provided.,team_A,poc,model_family_name +explainability,large,,It is not possible to explain the model predictions.,team_A,poc,model_family_name +fairness,no,,Fairness requirement to be determined by the Risk Assessment Questionnaire. No requirement for now.,team_A,poc,model_family_name +ownership,no,,The model is owned by team A.,team_A,poc,model_family_name +standards_compliance,large,,Applicable standards are not known or the model does not comply with them.,team_A,poc,model_family_name +vulnerability,large,,"",team_A,poc,model_family_name +readability,large,y,"Readability is not assessed, or the code is not easily readable, functions/variables have non-human readable names.",team_A,poc,model_family_name diff --git a/quality_assessment_form.md b/quality_assessment_form.md index d96f216..08cf6cb 100644 --- a/quality_assessment_form.md +++ b/quality_assessment_form.md @@ -23,7 +23,7 @@ This file contains the questions used to initially assessed the ML models techin 9. Are there other teams or departments besides yours relying on this model? 10. What might be the consequences of disabling the model in production? Does this pose an existential risk to the - Booking.com business? + business? 11. Is the model being tested in an experiment (provide relevant link)? diff --git a/setup.py b/setup.py index d932b4e..ab0e58c 100644 --- a/setup.py +++ b/setup.py @@ -12,8 +12,8 @@ setuptools.setup( name='ml_quality', version=version, - author=["Antonio Castelli", "George Chouliaras"], - author_email=["antonio.castelli@booking.com", "georgios.chouliaras@booking.com"], + author=["anonymous author"], + author_email=[""], description="Python tools to perform assessment on quality of ML systems", long_description=long_description, long_description_content_type="text/markdown", @@ -21,7 +21,6 @@ packages=setuptools.find_packages(exclude=["tests"]), classifiers=[ "Programming Language :: Python :: 3", - "License :: All Rights Reserved by Booking.com", "Operating System :: OS Independent", ], install_requires=["plotly>=5.5.0", "kaleido", "pandas", "numpy", "aenum", "pdfkit"],