From d5d71704ec6e7c131621aa35e4bab98d83539ab0 Mon Sep 17 00:00:00 2001 From: fchouteau Date: Tue, 12 Nov 2024 23:57:05 +0100 Subject: [PATCH] Deployed 76dd324 with MkDocs version: 1.5.3 --- 1_5_be.html | 2 +- 1_8_deployment_tp.html | 133 ++++++++++++++---- search/search_index.json | 2 +- sitemap.xml | 58 ++++---- sitemap.xml.gz | Bin 463 -> 463 bytes .../fonts/league-gothic/league-gothic.eot | Bin .../fonts/league-gothic/league-gothic.ttf | Bin .../fonts/league-gothic/league-gothic.woff | Bin .../source-sans-pro-italic.eot | Bin .../source-sans-pro-italic.ttf | Bin .../source-sans-pro-italic.woff | Bin .../source-sans-pro-regular.eot | Bin .../source-sans-pro-regular.ttf | Bin .../source-sans-pro-regular.woff | Bin .../source-sans-pro-semibold.eot | Bin .../source-sans-pro-semibold.ttf | Bin .../source-sans-pro-semibold.woff | Bin .../source-sans-pro-semibolditalic.eot | Bin .../source-sans-pro-semibolditalic.ttf | Bin .../source-sans-pro-semibolditalic.woff | Bin slides/plugin/markdown/plugin.js | 0 slides/plugin/math/katex.js | 0 slides/static/.DS_Store | Bin 8196 -> 0 bytes slides/static/img/.DS_Store | Bin 6148 -> 0 bytes slides/static/img/ads_logo.jpg | Bin slides/static/img/docker_logo.png | Bin slides/static/img/docker_pratique.png | Bin slides/static/img/docker_workflow.png | Bin slides/static/img/ec2.png | Bin slides/static/img/meme_1.jpg | Bin slides/static/img/pet-vs-cattle-2.png | Bin slides/static/img/pet-vs-cattle.png | Bin slides/static/img/scaleway_logo_2018.png | Bin 33 files changed, 138 insertions(+), 57 deletions(-) mode change 100755 => 100644 slides/dist/theme/fonts/league-gothic/league-gothic.eot mode change 100755 => 100644 slides/dist/theme/fonts/league-gothic/league-gothic.ttf mode change 100755 => 100644 slides/dist/theme/fonts/league-gothic/league-gothic.woff mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.eot mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.ttf mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.woff mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.eot mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.ttf mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.woff mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.eot mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.ttf mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.woff mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.eot mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.ttf mode change 100755 => 100644 slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.woff mode change 100755 => 100644 slides/plugin/markdown/plugin.js mode change 100755 => 100644 slides/plugin/math/katex.js delete mode 100644 slides/static/.DS_Store delete mode 100644 slides/static/img/.DS_Store mode change 100755 => 100644 slides/static/img/ads_logo.jpg mode change 100755 => 100644 slides/static/img/docker_logo.png mode change 100755 => 100644 slides/static/img/docker_pratique.png mode change 100755 => 100644 slides/static/img/docker_workflow.png mode change 100755 => 100644 slides/static/img/ec2.png mode change 100755 => 100644 slides/static/img/meme_1.jpg mode change 100755 => 100644 slides/static/img/pet-vs-cattle-2.png mode change 100755 => 100644 slides/static/img/pet-vs-cattle.png mode change 100755 => 100644 slides/static/img/scaleway_logo_2018.png diff --git a/1_5_be.html b/1_5_be.html index 6635a26..09aec24 100644 --- a/1_5_be.html +++ b/1_5_be.html @@ -1991,7 +1991,7 @@

1.1.3 - Build your docker imageyour previous work to build the image +
  • Refer to your previous work to build the image
  • Danger

    diff --git a/1_8_deployment_tp.html b/1_8_deployment_tp.html index 64db404..cd685a7 100644 --- a/1_8_deployment_tp.html +++ b/1_8_deployment_tp.html @@ -957,9 +957,9 @@
  • - + - 2 - Démarrage du Code Space + 2 - Démarrage du GitHub Codespace @@ -1017,6 +1017,15 @@ +
  • + +
  • + + + 9. BONUS - Passer à l'échelle + + +
  • @@ -1378,9 +1387,9 @@
  • - + - 2 - Démarrage du Code Space + 2 - Démarrage du GitHub Codespace @@ -1438,6 +1447,15 @@ +
  • + +
  • + + + 9. BONUS - Passer à l'échelle + + +
  • @@ -1460,20 +1478,27 @@

    Deploy your ML model into production🔗

    Objectifs🔗

    -

    L'objectif du TP est de convertir ce notebook en deux services containerisés : Un back-end qui est un serveur qui reçoit des images et sort des prédictions, ainsi qu'un front-end qui vous permet d'envoyer des images au modèle et d'afficher les prédictions sur lesdites images,

    -

    Afin de gagner du temps, les dockerfiles ont déjà été construits et sont prêts à être testés et déployés. Si vous souhaitez rentrer dans les détails et écrire vous-même le code, vous pouvez consulter la version longue de ce TP (qui n'est pas à jour).

    -

    Nous allons donc voir : -- La création d'un docker "backend" qui contient le modèle derrière une "API" -- L'interaction avec ce docker -- La création d'un docker "frontend" qui contient une IHM permettant d'interagir plus facilement avec le backend -- docker-compose pour lancer des applications multi-container -- Le déploiement du backend sur GCP -- Le test final

    +

    L'objectif du TP est de convertir ce notebook en deux services containerisés :

    +
      +
    • un back-end qui est un serveur qui reçoit des images et sort des prédictions,
    • +
    • un front-end qui vous permet d'envoyer des images au modèle et d'afficher les prédictions sur lesdites images,
    • +
    +

    Afin de gagner du temps, les dockerfiles ont déjà été construits et sont prêts à être testés et déployés. Si vous souhaitez rentrer dans les détails et écrire vous-même le code, vous pouvez consulter la version longue de ce TP ci-dessous (qui n'est pas à jour).

    +

    Nous allons donc voir :

    +
      +
    • La création d'un docker "backend" qui contient le modèle derrière une "API"
    • +
    • L'interaction avec ce docker
    • +
    • La création d'un docker "frontend" qui contient une IHM permettant d'interagir plus facilement avec le backend
    • +
    • docker-compose pour lancer des applications multi-container
    • +
    • Le déploiement du backend sur GCP
    • +
    • Le test final
    • +

    Nous nous plaçons dans un contexte "microservices" où le front-end et le backend sont 2 containers différents. Il aurait été possible de n'en faire qu'un qui contient les deux (un "monolithe"). Une architecture microservices peut avoir certains avantages (modularité, maintenance) mais est plus complexe à mettre en oeuvre.

    1 - Mise en place du projet Google Cloud Platform🔗

    -

    Maintenant que vous avez vos crédits, suivez les instructions du 1er TP Google Cloud Platform pour créer votre propre projet GCP

    -

    2 - Démarrage du Code Space🔗

    -

    Démarrez un github codespace depuis le repository https://github.com/fchouteau/isae-cloud-computing-codespace

    +

    Sélectionnez votre projet Google Cloud Platform personnel

    +

    2 - Démarrage du GitHub Codespace🔗

    +

    Si vous avez déjà démarré un GitHub Codespace précédemment, vous pouvez le relancer via l'interface habituelle

    +

    Sinon, démarrez un github codespace depuis le repository https://github.com/fchouteau/isae-cloud-computing-codespace

    Il est nécéssaire d'utiliser un codespace à partir de ce repository car il contient tout ce dont vous avez besoin pour ce TP.

    codespace

    Normalement, une fois le codespace lancé, vous devriez obtenir une interface vscode avec deux dossiers dont un nommé tp-deployment. Rendez-vous dans ce dossier,

    @@ -1481,7 +1506,7 @@

    2 - Démarrage du Code Space3 - Construction et tests du backend🔗

    Le README.md du dossier backend contient des détails concernant la construction du serveur et de son API (qui était auparavant laissé en exercice). Nous utilisons FastAPI qui un framework de construction d'applications Web.

    Le code principal se trouve dans app.py. On déclare des "routes" (des méthodes d'interactions avec le serveur) puis on leur assigne des fonctions.

    -

    Par exemple, vous pouvez regarder la route predict qui est associée à la fonction du même nom.

    +

    Par exemple, vous pouvez regarder la route /predict qui est associée à la fonction du même nom.

    @app.post(
         "/predict",
         description="Send a base64 encoded image + the model name, get detections",
    @@ -1490,20 +1515,25 @@ 

    3 - Construction et tests du backend )

    Cette fonction effectue l'inférence sur l'image qui est donnée via la requête REST vers la route /predict.

    -

    Afin de mieux illustrer les possibilités d'intéraction avec ce serveur, nous allons le lancer localement, en utilisant l'image docker déjà construite (Remarque: vous pouvez reproduire le docker en lançant docker build -f Dockerfile -t eu.gcr.io/third-ridge-138414/yolo-v5:1.2)

    +

    Afin de mieux illustrer les possibilités d'intéraction avec ce serveur, nous allons le lancer localement, en utilisant l'image docker déjà construite

    +
    +

    Note

    +

    Remarque: vous pouvez reproduire le docker en lançant

    +

    docker build -f Dockerfile -t eu.gcr.io/third-ridge-138414/yolo-v5:1.2

    +

    Lancez la commande suivante docker run --rm -p 8000:8000 eu.gcr.io/third-ridge-138414/yolo-v5:1.2

    Cela lance un container depuis l'image docker du backend en exposant le port 8000.

    Connectez-vous au port 8000 du codespace. Vous devriez avoir une page vierge qui contient "YOLO-V5 WebApp created with FastAPI"

    -

    Nous allons maintenant regarder la documentation de l'application. Celle-ci est automatiquement générée à partir du code de app.py et est disponible sur la route /docs

    +

    Nous allons maintenant regarder la documentation de l'application. Celle-ci est automatiquement générée à partir du code de app.py via le framework FastAPI et est disponible sur la route /docs. Pour plus d'informations, voir ici

    Connectez-vous donc à la route /docs en rajoutant ce terme à l'URL du codespace.

    fastapidoc

    -

    Cette page web décrit les différentes routes accessibles et leurs méthodes d'intéraction, ainsi que les formats d'entrée et de sortie. C'est la documentation de l'API et lorsque vous interagissez avec le serveur, c'est la seule chose dont vous avez besoin.

    +

    Cette page web décrit les différentes routes accessibles et leurs méthodes d'intéraction, ainsi que les formats d'entrée et de sortie. C'est la documentation de l'API et lorsque vous interagissez avec le serveur, c'est la seule chose dont vous avez besoin. La documentation de l'API est normalisée.

    Nous allons maintenant interagir avec ce serveur.

    Dans le dossier backend se trouve un fichier python test_webapp.py. Il va automatiquement envoyer les bonnes requêtes au serveur. Executez-le (python test_webapp.py), vous devriez voir s'afficher des tests correspondants au code, ainsi que les prédictions des chats sur l'image cats.png

    Laissez le terminal avec le container démarré pour l'instant,

    4 - Construction et tests du frontend🔗

    Comme vous aurez pu le constater, ce n'est pas très intuitif d'interagir avec le backend via des scripts, on aimerait pouvoir visualiser plus facilement les prédictions, faire des seuils sur la confiance des objets, etc...

    -

    Pour cela nous allons créer une application streamlit (remarque: pour une introduction à streamlit rendez-vous dans la section 6 du BE)

    +

    Pour cela nous allons créer une application streamlit (remarque: pour une introduction à streamlit rendez-vous dans la section 6 du BE)

    Dans votre codespace, démarrez un nouveau terminal puis allez dans le dossier frontend. Là encore, le fichier app.py contient le code de l'applicaiton streamlit. Celle-ci va récupérer une image que vous allez uploader (image de votre choix) puis l'envoyer au serveur dont vous spécifiez l'IP dans la case en haut à gauche.

    Nous allons lancer cette application,

    docker run --rm -p 8501:8501 --network="host" eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5

    @@ -1538,9 +1568,9 @@

    5 - docker-compose6 - Deploiement du backend sur une VM Google Compute Engine🔗

    -

    Nous allons maintenant démarrer une instance de VM Google Compute Engine et directement y déployer un container. Vous avez déjà vu cette méthode dans la section streamlit du BE

    +

    Nous allons maintenant démarrer une instance de VM Google Compute Engine et directement y déployer un container. Vous avez déjà vu cette méthode dans la section streamlit du BE

    N'oubliez pas de connecter votre github codespace à votre projet gcp en utilisant gcloud init

    -

    Récupérez votre project_id gcp : `PROJECT_ID=$(gcloud config get-value project 2> /dev/null)``

    +

    Récupérez votre project_id gcp via l'interface ou via la variable suivante : PROJECT_ID=$(gcloud config get-value project 2> /dev/null)

    Puis nous allons créer directement une VM en y déployant un container. Notez que l'on utilise cette fois un OS dédié à l'hébergement de containers (pas prévu pour s'y connecter en ssh) plutôt qu'ubuntu comme précédemment.

    gcloud compute instances create-with-container fch-yolo-backend \
         --project=${PROJECT_ID} \
    @@ -1559,15 +1589,66 @@ 

    7 - Tests8. Yay !🔗

    Success

    🍾 Et voilà, vous avez déployé votre premier modèle sur le cloud

    -

    N'oubliez pas de supprimer votre VM GCP une fois le travail terminé

    +
    +

    Warning

    +

    N'oubliez pas de supprimer votre VM GCE une fois le travail terminé

    +
    +

    9. BONUS - Passer à l'échelle🔗

    +

    Nous venons de déployer un modèle sur une unique machine.

    +

    Il manque certains éléments à notre déploiement :

    +
      +
    • Un nom de domaine
    • +
    • Une capacité à passer à l'échelle sur plusieurs machines, ou d'être à zéro machines s'il n'y a pas de demandes
    • +
    • Une gestion des mises à jour : Comment déployer une nouvelle version de l'application ?
    • +
    • Un routage du trafic sur la bonne instance
    • +
    +

    Nous allons donc voir une solution de déploiement de container "managée" (aussi dite serverless / "Container as a Service") : Google Cloud Run. Pour en savoir plus, lisez l'introduction au service.

    +

    L'objectif est de déployer notre container qui est un service, sans gérer l'infrastructure, ni le routage. +gcr

    +

    Nous allons suivre à peu près les étapes du tutorial

    +
    +

    Hint

    +

    Afin de tester le passage à l'échelle, il est recommandé de se mettre en groupe et de ne faire qu'un seul déploiement et ensuite de tous essayer d'utiliser le même service (la même URL) une fois celui-ci déployé.

    +
    +
      +
    • Rendez-vous sur la page de GCR
    • +
    • Sélectionnez "déployer un container"
    • +
    • Entrez l'URL du container à déployer `eu.gcr.io/third-ridge-138414/yolo-v5:1.2``
    • +
    • Entrez un nom de service
    • +
    • Sélectionnez la zone europe (west1, west4, west9)
    • +
    • Autorisez les requêtes non authentifiées
    • +
    • Ingress control : all
    • +
    • Dans les paramètres du container, sélectionnez le port 8000 et allouez lui 2 Go de RAM
    • +
    • Réglez 10s de timeout et 5 requêtes maximum par instance
    • +
    • Mettez 5 instances maximum
    • +
    • Et voilà !
    • +
    +

    Normalement, votre service se crée. Une fois celui-ci démarré, une instance est démarrée (vous n'avez pas la main sur l'infrastructure) et la prédiction est accessible à l'URL du service.

    +

    Relancez le front end depuis votre codespace puis entrez l'URL du service. Lancez une prédiction.

    +
    +

    Success

    +

    🍾 Et voilà, vous avez déployé votre premier modèle sur le cloud

    +
    +

    Si vous essayez de faire plusieurs requêtes simultanées au service avec des images différentes depuis plusieurs personnes, il est possible que le service "passe à l'échelle" automatiquement

    +

    Pour surveiller le trafic de votre service vous pouvez utilisez : +- Soit la page web du service google cloud run +- Soit le Metrics Explorer en sélectionnant la métrique Cloud Run Révision - Container - Instance Count. Vous pouvez aussi ajouter cette métrique en widget du dashboard gcr...

    +
    +

    Hint

    +

    Normalement une URL d'un service a été postée sur slack, vous pouvez l'essayer...

    +
    +
    +

    Warning

    +

    N'oubliez pas de supprimer votre service google cloud run une fois le travail terminé

    +
    diff --git a/search/search_index.json b/search/search_index.json index 254cabf..a382cf8 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"index.html","title":"Data Engineering","text":"

    The amount of data in the world, the form these data take, and the ways to interact with data have all increased exponentially in recent years. The extraction of useful knowledge from data has long been one of the grand challenges of computer science, and the dawn of \"big data\" has transformed the landscape of data storage, manipulation, and analysis. In this module, we will look at the tools used to store and interact with data.

    The objective of this class is that students gain:

    • First hand experience with and detailed knowledge of computing models, notably cloud computing
    • An understanding of distributed programming models and data distribution
    • Broad knowledge of many databases and their respective strengths

    As a part of the Data and Decision Sciences Master's program, this module aims specifically at providing the tool set students will use for data analysis and knowledge extraction using skills acquired in the Algorithms of Machine Learning and Digital Economy and Data Uses classes.

    "},{"location":"index.html#class-structure","title":"Class structure","text":"

    The class is structured in four parts:

    "},{"location":"index.html#data-engineering-fundamentals","title":"Data engineering fundamentals","text":"

    In this primer class, students will cover the basics of Linux command line usage, git, ssh, and data manipulation in python. The format of this class is an interactive capture-the-flag event.

    "},{"location":"index.html#data-storage","title":"Data storage","text":"

    This module covers Database Management Systems with a focus on SQL systems. For evaluation, students will install and manipulate data in PostgreSQL and MongoDB and compare the two systems.

    "},{"location":"index.html#data-computation","title":"Data computation","text":"

    A technical overview of the computing platforms used in the data ecosystem. We will briefly cover cluster computing and then go in depth on cloud computing, using Google Cloud Platform as an example. Finally, a class on GPU computing will be given in coordination with the deep learning section of the AML class.

    "},{"location":"index.html#data-distribution","title":"Data distribution","text":"

    In the final module, we cover the distribution of data, with a focus on distributed programming models. We will introduce functional programming and MapReduce, then use these concepts in a practical session on Spark. Finally, students will do a graded exercise with Dask.

    "},{"location":"0_1_databases.html","title":"Data Storage","text":"

    In this module on databases, database management systems will be covered. A basic understanding of SQL is considered as a prerequisite, and students can refer to the slides and additional resources if needed. For evaluation, students will install and explore the advantages of different DBMSs as a graded project.

    In this first class, we introduce the basics of database management systems and cover high level DBMS functionality.

    Slides

    For the next class, students should install PostgreSQL and MongoDB on their local machines.

    "},{"location":"0_1_databases.html#additional-resources","title":"Additional Resources","text":"
    • PostgreSQL documentation
    • MongoDB documentation
    • SQLBolt - SQL exercises
    • Databases introduction (fr)
    • A comprehensive overview of database systems (en)
    "},{"location":"0_2_ETL.html","title":"Extract, Transform, Load (ETL)","text":"

    In this module on ETL, we will cover the fundamental concepts and practices of data integration and processing. A basic understanding of databases and SQL is considered a prerequisite. Students can refer to the slides and additional resources if they need to refresh their knowledge. For evaluation, students will design and implement an ETL pipeline as a graded project.

    In this first class, we introduce the basics of ETL processes and cover high-level ETL functionality and tools.

    Slides

    "},{"location":"0_3_dbms.html","title":"Evolution of Data Management Systems","text":""},{"location":"0_3_dbms.html#fundamental-concepts-methods-and-applications","title":"Fundamental Concepts, Methods and Applications","text":"

    In this three part class, students will cover the history of data management systems, from file systems to databases to distributed cloud storage. This class is given over the length of the Data Engineering course. Questions from the first two parts are integrated into the exam on cloud computing, and questions from the Cloud DMS section are integrated into the Dask notebook evaluation.

    "},{"location":"0_3_dbms.html#objectives","title":"Objectives","text":"

    The objectives of this course are: - Introduce the fundamental concepts - Describe, in a synthetic way, the main characteristics of the evolution of DMS (Data Management Systems) - Highlight targeted application classes.

    "},{"location":"0_3_dbms.html#key-words","title":"Key Words","text":"

    Data Management Systems, Uni-processor DBMS, Parallel DBMS, Data Integration Systems,Big Data, Cloud Data Management Systems, High Performance, Scalability, Elasticity, Multi-store/Poly-store Systems

    "},{"location":"0_3_dbms.html#targeted-skills","title":"Targeted Skills","text":"
    • Effectively exploit the DMS according to the environment (uniprocessor, parallel, distributed, cloud) in a perspective of decision support within an organization.
    • Ability to choose, in a relevant way, a DMS in multiple environments for an optimal functioning of the applications of an organization
    "},{"location":"0_3_dbms.html#indicative-program","title":"Indicative Program","text":"
    1. Introduction to Main Problems of Data Management

      • From File Management Systems FMS to Database MS DBMS
      • Motivations, Objectives, Organizations & Drawbacks
      • Databases & Rel. DBMS: Motivations & Objectives
      • Resources:
        • Introduction
        • SGF - File Systems
        • Views - Relational Systems
        • File Organization
    2. Parallel Database Systems

      • Objectives and Parallel Architecture Models
      • Data Partitioning Strategies
      • Parallel Query Processing
      • Resources:
        • Parallel DBMS
        • Parallel Queries
        • Systems DB Parallel
    3. From Distributed DB to Data Integration Systems DIS

      • An Ex. of DDB, Motivations & Objectives
      • Designing of DDB
      • Distributed Query Processing
      • An Ex. of DIS
      • Motivations & Objectives
      • Mediator-Adapters Architecture
      • Design of a Global Schema (GAV, LAV)
      • Query Processing Methodologies
      • Resources:
        • Distributed DBMS - Chapter 1
        • Distributed DBMS - Chapter 2
        • Distributed DBMS - Chapter 3
        • Systems for integrating heterogeneous and distributed data
        • Integration Systems complement
        • Distributed DBMS Dec 2023
    4. Cloud Data Management Systems CDMS

      • Motivations and Objectives
      • Main Characteristics of Big Data and CDMS
      • Classification of Cloud Data Management Systems CDMS
      • Advantages and Weakness of Parallel RDBMS and CDMS
      • Comparison between Parallel RDBMS and CDMS
      • Introduction to Multi-store/Ploystore Systems
      • Resources:
        • Cloud Systems
        • MapReduce examples
    5. Conclusion

      • Maturity of Cloud DMS
      • Key Criteria for Choosing a Data Management System
    "},{"location":"0_3_dbms.html#additional-reading","title":"Additional Reading","text":"
    1. Principles of Distributed Database Systems, M. Tamer Ozsu and Patrick Valduriez; Springer-Verlag ; Fourth Edition, December 2019.

    2. Data Management in the Cloud: Challenges and Opportunities Divyakant Agrawal, Sudipto Das, and Amr El Abbadi; Synthesis Lectures on Data Management, December 2012, Vol. 4, No. 6 , Pages 1-138.

    3. Query Processing in Parallel Relational Database Systems; H. Lu, B.-C Ooi and K.-L. Tan; IEEE Computer Society Press, CA, USA, 1994.

    4. Traitement parall\u00e8le dans les bases de donn\u00e9es relationnelles : concepts, m\u00e9thodes et applications Abdelkader Hameurlain, Pierre Bazex, Franck Morvan; C\u00e9padu\u00e8s Editions, Octobre 1996.

    "},{"location":"0_3_postgres.html","title":"PostgeSQL","text":"

    In this practical session, we cover many examples of database queries with the popular DBMS PostgreSQL.

    Based on the TP by Christophe Garion, CC BY-NC-SA 2015.

    "},{"location":"0_3_postgres.html#setup","title":"Setup","text":"

    Before class, please install PostgreSQL and pgAdmin.

    "},{"location":"0_3_postgres.html#postgresql-installation","title":"PostgreSQL installation","text":"

    For this session, students should install PostgreSQL (v9 or higher) and pgAdmin (v4). Follow the installation instructions and make sure you have an initial database setup and the postgresql service running.

    • Installation on Ubuntu
    • Installation on Mac OS
    • Installation on Arch Linux
    • Installation on Windows Subsystem for Linux
    • Installation on Windows (and add the PostgreSQL binaries to your path)

    Additionally, add your login user as a postgresql superuser to enable database creation with your user:

    # bash shell in Linux or OSX\n$ sudo su -l postgres\n[postgres]$ createuser --interactive\n
    "},{"location":"0_3_postgres.html#pgadmin","title":"pgAdmin","text":"

    You can do all exercises directly through the psql shell for this class. However, it is useful to have a graphical confirmation of the database configuration. pgAdmin is one of many front-ends for Postgres. Install it by following the instructions on the pgAdmin site.

    "},{"location":"0_3_postgres.html#setup-database-creation","title":"Setup - database creation","text":"

    Once you've installated and configured PostgreSQL, create the first exercise database:

    # bash shell in Linux or OSX or windows powershell\n$ createdb db-mexico86\n

    you can also do this through an SQL shell:

    # SQL shell\npostgres=# CREATE DATABASE \"db-mexico86\";\n

    Confirm with pgAdmin that your database db-mexico86 was created. If you don't have any servers, create one by right-clicking. The host address is 127.0.0.1 and the maintenance database and username should be postgres.

    In pgAdmin, if you are asked for a password and don't know what your password is, you can reset the password of the postgres user:

    change password
    postgres=# ALTER USER postgres WITH PASSWORD \"newpassword\";\n
    "},{"location":"0_3_postgres.html#mexico86-database-simple-queries","title":"Mexico86 database - simple queries","text":"

    This database contains data from the 1986 football World Cup.

    You can download the database creation script individually:

    $ wget https://raw.githubusercontent.com/SupaeroDataScience/DE/master/scripts/mexico86/create-tables-std.sql\n

    Or git clone the class repository and navigate to the creation and insertion scripts.

    Once you have the scripts, run the database creation script in the mexico folder.

    # bash shell in Linux or OSX, or windows powershell\n$ psql -d db-mexico86 -f mexico86/create-tables-std.sql\n

    If that doesn't work, you can copy the script into the Query Tool in pgAdmin.

    Exercise 1.1: Look at the database creation scripts. What are the tables being created? What are their fields? Which fields are keys? Confirm these values in pgAdmin.

    Response Pays: (nom, groupe) Typematch: (type) Match: (paysl, paysv, butsl, butsv, type, date)

    You should be able to make queries now. You can either use PostgreSQL in interactive mode by running

    $ psql -d db-mexico86\n

    or write your solutions in an SQL file and run the file:

    $ echo \"SELECT groupe FROM pays;\" > a.sql\n$ psql -d db-mexico86 -f a.sql\n

    You can also use the Query Editor in pgAdmin for a graphical interface.

    Exercise 1.2: Write a query which lists the countries participating in the World Cup.

    Response
            nom \n---------------------\nArgentine\nItalie\nBulgarie\nCor\u00e9e\nMexique\nParaguay\nBelgique\nIrak\nURSS\nHongrie\nFrance\nCanada\nBr\u00e9sil\nEspagne\nIrlande du Nord\nAlg\u00e9rie\nDanemark\nRFA\nUruguay\n\u00c9cosse\nMaroc\nAngleterre\nPologne\nPortugal\n(24 rows)\n

    Exercise 1.3: Write a query which lists all matches as a pair of countries per match.

    Response
            paysl        |        paysv \n---------------------|---------------------\nBulgarie            | Italie\nArgentine           | Cor\u00e9e\nItalie              | Argentine\nCor\u00e9e               | Bulgarie\nCor\u00e9e               | Italie\nArgentine           | Bulgarie\nBelgique            | Mexique\nParaguay            | Irak\nMexique             | Paraguay\nIrak                | Belgique\nIrak                | Mexique\nParaguay            | Belgique\nCanada              | France\nURSS                | Hongrie\nFrance              | URSS\nHongrie             | Canada\nURSS                | Canada\nHongrie             | France\nEspagne             | Br\u00e9sil\nAlg\u00e9rie             | Irlande du Nord\nBr\u00e9sil              | Alg\u00e9rie\nIrlande du Nord     | Espagne\nIrlande du Nord     | Br\u00e9sil\nAlg\u00e9rie             | Espagne\nUruguay             | RFA\n\u00c9cosse              | Danemark\nDanemark            | Uruguay\nRFA                 | \u00c9cosse\n\u00c9cosse              | Uruguay\nDanemark            | RFA\nMaroc               | Pologne\nPortugal            | Angleterre\nAngleterre          | Maroc\nPologne             | Portugal\nAngleterre          | Pologne\nMaroc               | Portugal\nBr\u00e9sil              | Pologne\nFrance              | Italie\nMaroc               | RFA\nMexique             | Bulgarie\nArgentine           | Uruguay\nAngleterre          | Paraguay\nURSS                | Belgique\nEspagne             | Danemark\nBr\u00e9sil              | France\nRFA                 | Mexique\nArgentine           | Angleterre\nBelgique            | Espagne\nFrance              | RFA\nArgentine           | Belgique\nRFA                 | Argentine\n(51 rows)\n

    Exercise 1.4: Write a query which lists the matches which took place on June 5, 1986.

    Response
            paysl        |   paysv\n---------------------|-----------\nItalie              | Argentine\nCor\u00e9e               | Bulgarie\nFrance              | URSS\n(3 rows)\n

    Exercise 1.5: Write a query which lists the countries which France played against (hint, France could have played either side).

    Response
    pays\n---------\nBr\u00e9sil\nCanada\nHongrie\nItalie\nRFA\nURSS\n(6 rows)\n

    Exercise 1.6: Write a query which returns the winner of the World Cup

    Response
    pays\n-----------\nArgentine\n(1 row)\n
    "},{"location":"0_3_postgres.html#beer-database","title":"Beer database","text":"

    We'll now use a database which tracks the beers that a group of friends enjoy. Create the database and populate it using the provided scripts.

    $ createdb db-beer\n$ psql -d db-beer -f beer/create-tables-std.sql\n$ psql -d db-beer -f beer/insert.sql\n

    Exercise 2.1: Look at the database creation scripts. What are the tables being created? What are their fields? Which fields are keys? Confirm these values in pgAdmin.

    Response Frequente: (buveur, bar) Sert: (bar, biere) Aime: (buveur, biere)

    Write queries which respond to the following questions. Hint, understanding natural joins may help.

    Exercise 2.2 What is the list of bars which serve the beer that Martin likes?

    Response
            bar \n-------------------\n Ancienne Belgique\n La Tireuse\n Le Filochard\n(3 rows)\n

    Exercise 2.3 What is the list of drinkers who go to at least one bar which servers a beer they like?

    Response
     buveur \n--------\n Bob\n David\n Emilie\n Martin\n(4 rows)\n

    Exercise 2.3 What is the list of drinkers who don't go to any bars which serve the beer they like?

    Response
     buveur \n--------\n Cecile\n Alice\n(2 rows)\n
    "},{"location":"0_3_postgres.html#complex-queries-mexico-database","title":"Complex queries - Mexico database","text":"

    Exercise 3.1: Create a table with an entry for each match which lists the total number of goals (scored by either side), the match type, and the date. As we'll use this table later on, create a VIEW called \"matchbutsglobal\" with this information.

    Response
            paysl        |        paysv        | buts |  type  |    date \n---------------------+---------------------+------+--------+------------\n URSS                | Belgique            |    7 | 1/8    | 1986-06-15\n France              | Italie              |    2 | 1/8    | 1986-06-17\n Maroc               | Pologne             |    0 | Poule  | 1986-06-02\n RFA                 | Argentine           |    5 | Finale | 1986-06-29\n Br\u00e9sil              | France              |    2 | 1/4    | 1986-06-21\n Italie              | Argentine           |    2 | Poule  | 1986-06-05\n Maroc               | Portugal            |    4 | Poule  | 1986-06-11\n Br\u00e9sil              | Alg\u00e9rie             |    1 | Poule  | 1986-06-06\n Paraguay            | Belgique            |    4 | Poule  | 1986-06-11\n Hongrie             | France              |    3 | Poule  | 1986-06-09\n Irak                | Belgique            |    3 | Poule  | 1986-06-08\n Danemark            | RFA                 |    2 | Poule  | 1986-06-13\n Irlande du Nord     | Espagne             |    3 | Poule  | 1986-06-07\n Alg\u00e9rie             | Irlande du Nord     |    2 | Poule  | 1986-06-03\n RFA                 | Mexique             |    0 | 1/4    | 1986-06-21\n URSS                | Hongrie             |    6 | Poule  | 1986-06-02\n Mexique             | Paraguay            |    2 | Poule  | 1986-06-07\n Belgique            | Espagne             |    2 | 1/4    | 1986-06-22\n Irak                | Mexique             |    1 | Poule  | 1986-06-11\n Espagne             | Br\u00e9sil              |    1 | Poule  | 1986-06-01\n Angleterre          | Maroc               |    0 | Poule  | 1986-06-06\n Irlande du Nord     | Br\u00e9sil              |    2 | Poule  | 1986-06-12\n Maroc               | RFA                 |    1 | 1/8    | 1986-06-17\n Belgique            | Mexique             |    3 | Poule  | 1986-06-03\n Bulgarie            | Italie              |    2 | Poule  | 1986-05-31\n \u00c9cosse              | Uruguay             |    0 | Poule  | 1986-06-13\n Alg\u00e9rie             | Espagne             |    3 | Poule  | 1986-06-12\n Argentine           | Belgique            |    2 | 1/2    | 1986-06-25\n Br\u00e9sil              | Pologne             |    4 | 1/8    | 1986-06-16\n Danemark            | Uruguay             |    7 | Poule  | 1986-06-08\n Cor\u00e9e               | Italie              |    5 | Poule  | 1986-06-10\n Canada              | France              |    1 | Poule  | 1986-06-01\n Argentine           | Uruguay             |    1 | 1/8    | 1986-06-16\n France              | RFA                 |    2 | 1/2    | 1986-06-25\n France              | URSS                |    2 | Poule  | 1986-06-05\n Uruguay             | RFA                 |    2 | Poule  | 1986-06-04\n Angleterre          | Pologne             |    3 | Poule  | 1986-06-11\n Portugal            | Angleterre          |    1 | Poule  | 1986-06-03\n \u00c9cosse              | Danemark            |    1 | Poule  | 1986-06-04\n Angleterre          | Paraguay            |    3 | 1/8    | 1986-06-18\n Hongrie             | Canada              |    2 | Poule  | 1986-06-06\n Argentine           | Cor\u00e9e               |    4 | Poule  | 1986-06-02\n Pologne             | Portugal            |    1 | Poule  | 1986-06-07\n RFA                 | \u00c9cosse              |    3 | Poule  | 1986-06-08\n Mexique             | Bulgarie            |    2 | 1/8    | 1986-06-15\n URSS                | Canada              |    2 | Poule  | 1986-06-09\n Espagne             | Danemark            |    6 | 1/8    | 1986-06-18\n Paraguay            | Irak                |    1 | Poule  | 1986-06-04\n Argentine           | Bulgarie            |    2 | Poule  | 1986-06-10\n Argentine           | Angleterre          |    3 | 1/4    | 1986-06-22\n Cor\u00e9e               | Bulgarie            |    2 | Poule  | 1986-06-05\n(51 rows)\n

    Exercise 3.2: Write a query which caluculates the number of goals scored on average in all the matches of the French team.

    Response
        Moyenne buts\n--------------------\n 2.0000000000000000\n(1 row)\n

    Exercise 3.3: Write a query which calculates the total number of goals scored only by the French team.

    Response
     buts \n------\n    8\n(1 row)\n

    Exercise 3.4: Write a query which caluclates the total number of goals scored in each Poule match. Order the results by group.

    Response
     groupe | sum \n--------+-----\n A      |  17\n B      |  14\n C      |  16\n D      |  12\n E      |  15\n F      |   9\n(6 rows)\n

    Exercise 3.5: Write a function vainquer which takes in the two countries of a match and the match type and which returns the winner. Apply your function to the following pairs:

    SELECT * FROM vainqueur('Espagne', 'Danemark', '1/8');\nSELECT * FROM vainqueur('Br\u00e9sil', 'France', '1/4');\n
    Response
     vainqueur \n-----------\n Espagne\n(1 row)\n\n vainqueur \n-----------\n Match nul\n(1 row)\n

    Exercise 3.6: Write a function butsparequipe which returns the total and the average number of points scored by a team. Apply your function to the French team. Bonus points for making the result display the name of the team.

    SELECT * FROM butsparequipe('France');\n
    Response
      pays  | total |      moyenne \n--------+-------+--------------------\n France |     8 | 1.3333333333333333\n(1 row)\n

    Exercise 3.7: Using the butsparequipe function, write a query which lists all countries and the points they scored.

    Response
            pays         | total \n---------------------+-------\n Argentine           |    14\n Italie              |     5\n Bulgarie            |     2\n Cor\u00e9e               |     4\n Mexique             |     6\n Paraguay            |     4\n Belgique            |    10\n Irak                |     1\n URSS                |    12\n Hongrie             |     2\n France              |     8\n Canada              |     0\n Br\u00e9sil              |     9\n Espagne             |    11\n Irlande du Nord     |     2\n Alg\u00e9rie             |     1\n Danemark            |    10\n RFA                 |     8\n Uruguay             |     2\n \u00c9cosse              |     1\n Maroc               |     3\n Angleterre          |     7\n Pologne             |     1\n Portugal            |     2\n(24 rows)\n

    Exercise 3.8: Using the butsparequipe function, write a query which shows the country which scored the most points and the number of points they scored.

    Response
       pays    | total \n-----------+-------\n Argentine |    14\n(1 row)\n
    "},{"location":"0_3_postgres.html#pull-the-trigger","title":"Pull the trigger","text":"

    In this exercise, we're going to create a TRIGGER, a mechanism which allows for automatically executing actions when an event occurs.

    Create the db-trigger database.

    $ createdb db-trigger\n

    Exercise 4.1: Create a table rel(nom, value) where nom is a string of characters and value is an integer. nom will be the primary key

    Solution
    CREATE TABLE IF NOT EXISTS rel (\n    nom VARCHAR(20),\n    valeur INTEGER,\n    PRIMARY KEY (nom)\n);\n

    Exercise 4.2: Add 5 tuples into the table

    Solution
    INSERT INTO rel VALUES\n       ('Alice', 10),\n       ('Bob', 5),\n       ('Carl', 20),\n       ('Denise', 11),\n       ('Esther', 6);\n

    Exercise 4.3: Write a trigger such that, when adding new tuples, the average value of val cannot decrease. If a new tuple is added which would decrease the average, an exception should be raised.

    The following insertion should work:

    INSERT INTO rel VALUES ('Fab', 15);\n\nSELECT * FROM rel;\n

    As we can see, the (Fab, 15) tuple was added:

      nom   | valeur \n--------+--------\n Alice  |     10\n Bob    |      5\n Carl   |     20\n Denise |     11\n Esther |      6\n Fab    |     15\n(6 rows)\n

    However, the following insertion should give an exception:

    INSERT INTO rel VALUES ('Guy', 2);\n
    Solution
    CREATE OR REPLACE FUNCTION verifier_moyenne()\n                  RETURNS trigger AS $verifier_moyenne$\n    DECLARE\n      moyenne FLOAT;\n      nb      INTEGER;\n    BEGIN\n        moyenne := AVG(valeur) FROM rel;\n        nb := COUNT(*) FROM rel;\n\n        IF ((nb * moyenne + NEW.valeur) / (nb + 1)) < moyenne THEN\n            RAISE EXCEPTION 'problem with insertion: valeur average is decreasing!';\n        END IF;\n\n        RETURN NEW;\n    END;\n$verifier_moyenne$ LANGUAGE plpgsql;\n\nCREATE TRIGGER VerificationMoyenne\nBEFORE INSERT ON rel\nFOR EACH ROW\nEXECUTE PROCEDURE verifier_moyenne();\n
    "},{"location":"0_4_project.html","title":"Databases Project","text":"

    This project is detailed in the ETL class.

    You are part of a 4-person data engineering team at a startup, tasked with designing and implementing an ETL/ELT pipeline. Your assignment is to submit a 2-4 page report detailing the choices made for the ETL/ELT pipeline and to provide a demo of an example database.

    In your report, you need to clearly explain and justify your decisions for each phase of the pipeline:

    1. Extract (E): Identify and explain where the data is coming from. Discuss the sources and why they were chosen.

    2. Transform (T): Explain how the data is being transformed. Describe the processes, tools, and techniques used to clean, aggregate, or modify the data to make it useful for its intended purpose.

    3. Load (L): Detail how the data is loaded into the system, how it is stored, and how it will be used or queried. Discuss the database or storage options chosen, and explain how the data will be utilized by the organization or application.

    Along with the report, you are expected to provide a demo of an example database. You can use PostgreSQL, MongoDB, or another database system of your choice. The demo should include:

    • Documented scripts to load and manipulate example data that demonstrates the choices made for the ETL pipeline.
    • The data used in the demo does not need to be exhaustive, but it should be sufficient to illustrate the key decisions in the ETL process.
    "},{"location":"0_4_project.html#grading-criteria","title":"Grading Criteria:","text":"
    • Report Rigor (6 points): Depth and thoroughness in explaining your ETL/ELT choices.
    • Report Clarity (6 points): How clearly and effectively your report communicates the ETL/ELT pipeline.
    • Demo Data (4 points): Appropriateness and accuracy of the example data used in the demo.
    • Demo Manipulation (4 points): Functionality and quality of the data manipulation demonstrated in the example.
    "},{"location":"0_4_project.html#deadline","title":"Deadline:","text":"
    • The report and demo must be submitted by October 11, 2024, end of day to the LMS.
    "},{"location":"1_1_overview.html","title":"Data Computation Part 1: Cloud Computing, Containers & Deployment","text":""},{"location":"1_1_overview.html#syllabus","title":"Syllabus","text":""},{"location":"1_1_overview.html#introduction","title":"Introduction","text":"

    Introduction to data computation module

    Link to slides

    "},{"location":"1_1_overview.html#cloud-computing-remote-development-3h","title":"Cloud Computing & Remote Development (3h)","text":"

    Intro to cloud computing & remote development environments

    "},{"location":"1_1_overview.html#google-cloud-platform-3h","title":"Google Cloud Platform (3h)","text":"

    Discover Google Cloud Platform with your student credits !

    "},{"location":"1_1_overview.html#containers-3h","title":"Containers (3h)","text":"

    Intro to containers & docker

    "},{"location":"1_1_overview.html#be-gcp-containers-3h","title":"BE : GCP & Containers (3h)","text":"

    A small workshop that puts everything together: Google cloud & docker

    "},{"location":"1_1_overview.html#be-deploy-your-ml-model-in-production-3h","title":"BE : Deploy your ML model in production (3h)","text":"

    Deploy your machine learning model in production with everything you've learnt

    We will then switch to the introduction to orchestration and kubernetes lectures

    "},{"location":"1_1_overview.html#quiz-and-recap","title":"Quiz and recap","text":"

    The evaluation of this section will be done with an open-resource quiz covering all cloud computing topics.

    The conclusion slides should be used to recap the previous courses.

    "},{"location":"1_2_cloud.html","title":"Cloud Computing & Remote Development Environment","text":""},{"location":"1_2_cloud.html#cloud-computing","title":"Cloud Computing","text":"

    Link to slides

    "},{"location":"1_2_cloud.html#remote-development","title":"Remote Development","text":"

    Link to slides

    "},{"location":"1_2_setup_codespace.html","title":"Remote Development hands-on","text":""},{"location":"1_2_setup_codespace.html#1-abstract","title":"1. Abstract","text":"

    Abstract

    In this hands on you will start to manipulate a Github Codespace remote development environment to get familiar about manipulating code and data not stored in your computer We will also discover streamlit which is a python library used to build frontend, and discover how to preview some things from the github codespace to your machine

    Warning

    Some things may only work on eduroam or in 4G... Some things may only works on Google Chrome

    Warning

    Don't forget to shutdown everything when you're done !

    Note

    When the TP says to replace \"{something}\" with a name, don't include the brackets so write \u201cyourname\"

    "},{"location":"1_2_setup_codespace.html#1-my-first-virtual-machine-github-codespaces","title":"1. My first \"Virtual Machine\", Github Codespaces","text":"

    First, you will need a GitHub account. You should already have one, otherwise create one.

    "},{"location":"1_2_setup_codespace.html#intro-to-github-codespaces","title":"Intro to Github Codespaces","text":"
    • Github Codespaces is a \"managed VM\" made available to develop without needing to configure locally your environment.
    • Compared to configured a VM by yourself, this one comes loaded with developer tools, and thus is faster to use,
    • You have a free tier of 60 CPU hours / months and some disk space
    • You pay for the CPI when the VM is ON and for the disk when the codespace is create

    Have a look at the overview : https://docs.github.com/en/codespaces/overview

    Question

    • Can you describe it with your own words ?
    • How would ChatGPT (or any LLM) describe it ?

    Note

    Google Cloud has a similar service with Google Cloud Shell but since Codespaces is way more powerful, we will be using that

    "},{"location":"1_2_setup_codespace.html#create-your-codespace-and-connect-to-it","title":"Create your codespace and connect to it","text":"

    Go to https://github.com/fchouteau/isae-cloud-computing-codespace

    • Click on the top left corner for a new codespace
    • It should launch a browser with a vscode
    • Launch a terminal using the top right menu

    If that does not work, go to https://github.com/github/codespaces-blank and create a codespace from there

    You should arrive to a VScode instance

    Question

    • Where is it running ?

    If you go to the core page of https://github.com/codespaces you should see your codespace running

    "},{"location":"1_2_setup_codespace.html#explore-github-codespaces","title":"Explore github codespaces","text":"

    Github Codespace Getting Started

    Identify the following features in the interface

    Code editor (e.g., VS Code)\nTerminal\nFile explorer\nDebugging tools (e.g., breakpoints, console output)\n

    You can then carry these commands in order to get a feel of the \"computer\" behind

    • Check available disk space
    Bash command to run

    df -h

    • Check the OS name
    Bash command to run

    cat /etc/os-release

    • Check the CPU model
    Bash command to run

    cat /proc/cpuinfo

    • This is the hardware model... how many cores do you have available ? Which amount of RAM ?
    Help

    htop will give you your current usage and available cores, or you can do nproc

    • Try and upload a file from your computer to the codespace by right clicking on the file explorer on the left

    • Create a new file and write a simple python \"Hello World\", then execute it from the terminal

    "},{"location":"1_2_setup_codespace.html#a-demo-of-codespace-port-forwarding-web-preview","title":"A demo of codespace port forwarding / web preview","text":"
    • In your codespace, run jupyter lab to launch the jupyter lab installed in it
    • Check the \"port\" preview : It should have a new entry with the 8888 port. If not, create it manually
    • Click on open in browser
    • Copy the token from your terminal to the web browser
    • You are new in a jupyterlab hosted on your github codespace VM !

    Question

    Magic !? What do you think is happening ? Try to describe it with your own words

    • Cancel (CTRL+C) the jupyter process

    To learn more about port forwarding in codespaces, refer to the documentation

    "},{"location":"1_2_setup_codespace.html#2-running-your-notebooks-in-the-vm","title":"2. Running your notebooks in the VM","text":"

    As an exercise, you will setup your development environment in the codespace and run an MLClass Notebook inside the VM,

    • Transfer a notebook you are working on from your computer
    • Transfer the data as well if it's not downloaded
    • Setup your environment using pip, conda, etc... as you would do in your local machine
    • Run jupyter lab or jupyter notebook from your codespace and connect to it like previously
    • You can continue your script / etc...

    If you don't have anything at hand you can use this simple repo as an example (you will see that later on your DL classes) : https://github.com/pytorch/examples/tree/main/mnist

    Question

    How comfortable do you feel with this remote machine ? Is it easy to get data in or out ? Code in or out ?

    "},{"location":"1_2_setup_codespace.html#3-lets-discover-streamlit","title":"3. Let's discover Streamlit","text":"

    We will now introduce streamlit, which is a very nice tool to build quick webapps in python !

    In this TP you will build your first interactive webapp in python and preview it using codespace. This will help you get a feel of using the remote vscode

    First, look at this video,

    Your browser does not support the video tag.

    Then, take a look at an introduction to streamlit and the streamlit application gallery

    Question

    Can you describe what exactly is streamlit ? Could you find any way it could be useful to you ?

    "},{"location":"1_2_setup_codespace.html#31-your-first-streamlit-application","title":"3.1. Your first streamlit application","text":"

    Take a look at the code below,

    import streamlit as st\nfrom streamlit_image_comparison import image_comparison\nimport cv2\n\nst.set_page_config(\"Webb Space Telescope vs Hubble Telescope\", \"\ud83d\udd2d\")\n\nst.header(\"\ud83d\udd2d J. Webb Space Telescope vs Hubble Telescope\")\n\nst.write(\"\")\n\"This is a reproduction of the fantastic [WebbCompare](https://www.webbcompare.com/index.html) app by [John Christensen](https://twitter.com/JohnnyC1423). It's built in Streamlit and takes only 10 lines of Python code. If you like this app, please star [John's original repo](https://github.com/JohnEdChristensen/WebbCompare)!\"\nst.write(\"\")\n\nst.markdown(\"### Southern Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/southern_nebula_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/southern_nebula_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\n\nst.markdown(\"### Galaxy Cluster SMACS 0723\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/deep_field_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/deep_field_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Carina Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/carina_2800.png\",\n    img2=\"https://www.webbcompare.com/img/webb/carina_2800.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Stephan's Quintet\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/stephans_quintet_2800.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/stephans_quintet_2800.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n

    Question

    Can you describe, by reading the documentation, what does the code do ?

    "},{"location":"1_2_setup_codespace.html#32-local-deployment-in-codespace","title":"3.2. Local deployment in codespace","text":"

    First, we will install in the codespace the dependencies for our application,

    pip install streamlit streamlit opencv-python-headless streamlit-image-comparison

    Then create a file streamlit_jswt.py and copy/paste the code above.

    Then execute it streamlit run streamlit_jswt.py

    This will launch the application on the port 8501 (by default) of our codespace. You can connect to it as usual.

    \ud83e\udd29 Nice, isn't it ?

    Now you can quit the server.

    "},{"location":"1_2_setup_codespace.html#33-a-more-complex-application","title":"3.3. A more complex application","text":"

    We will run and package a more complex application, but a lot more useful for your deep learning class.

    If you started your github codespace from the isae cloud computing codespace, you should have a folder called demo-streamlit-activation-function.

    Otherwise, clone the repository git clone https://github.com/fchouteau/isae-cloud-computing-codespace.git

    cd to the directory cd isae-demo-streamlit-activation-functions then as last time, install the dependencies pip install -r requirements.txt then run the application streamlit run app.py

    You can visualize it as last time. This should be quite useful for you given you just left (or will just start, it's early in the year...) the Deep Learning Class !

    "},{"location":"1_3_gcp_handson.html","title":"Google Cloud Platform Hands-on","text":""},{"location":"1_3_gcp_handson.html#0-abstract","title":"0. Abstract","text":"

    Abstract

    In this hands on you will configure your GCP account, the google cloud SDK and access the cloud console using Google Cloud Shell, You will also discover a very useful tool, a managed jupyter notebook service from google named Google Colab which may be very important for your future developments this year

    Warning

    Some things may only work on eduroam or in 4G...

    Warning

    Don't forget to shutdown everything when you're done since it costs you money. At the end, even if you have not finished the TP, go to the section 8 \"Cleaning Up\"

    Tip

    When the TP says to replace \"{something}\" with a name, don't include the brackets so write \u201cyourname\"

    Tip

    If you are lost on where you are, normally the terminal has the hostname indicated, otherwise run the command hostname

    "},{"location":"1_3_gcp_handson.html#1-create-your-gcp-account","title":"1. Create your GCP Account","text":"

    Note

    You should have already done that last week

    Here you will each create a Google Cloud Platform account and project using the student credits given this year,

    Overview link

    • Create an account within Google cloud Platform using your ISAE e-mail
    • Use the code given by Dennis to redeem your free credits
    • You should have a free tier available to you as well as coupons
    • From the interface you should create a project with a name of your choice (it is recommended to put for example sdd2425-yourname so that it is clear)
    "},{"location":"1_3_gcp_handson.html#2-reconnect-to-github-codespaces","title":"2. (re)connect to GitHub Codespaces","text":""},{"location":"1_3_gcp_handson.html#if-you-still-have-your-codespace-from-last-time","title":"If you still have your codespace from last time","text":"

    If you go to the core page of https://github.com/codespaces and you see an existing codespace from last week, you can restart it using the (...) menu

    If you don't have one, recreate it (see below)

    "},{"location":"1_3_gcp_handson.html#create-your-codespace-and-connect-to-it","title":"Create your codespace and connect to it","text":"

    Go to https://github.com/fchouteau/isae-cloud-computing-codespace

    • Click on the top left corner for a new codespace
    • It should launch a browser with a vscode
    • Launch a terminal using the top right menu

    If that does not work, go to https://github.com/github/codespaces-blank and create a codespace from there

    You should arrive to a VScode instance

    If you go to the core page of https://github.com/codespaces you should see your codespace running

    "},{"location":"1_3_gcp_handson.html#3-install-google-cloud-sdk-configure-the-shell","title":"3. Install Google Cloud SDK & Configure the shell","text":"

    If you want to interact with GCP from your computer or codespaces, you will need to install the Google Cloud SDK, which will also install a shell if you are on windows

    Warning

    If you have a codespace cloned from mine, the google cloud sdk is already installed. Try gcloudto check that, and skip this if this returns something

    Note

    You can install the cloud shell locally, but I recommend using your codespace

    Installing locally

    The best ways to interact with google cloud SDK is with a terminal so in that order:

    • Ubuntu / Debian https://cloud.google.com/sdk/docs/install#deb
    • Other Linux (either VM or native): https://cloud.google.com/sdk/docs/install#linux
    • MacOS: https://cloud.google.com/sdk/docs/install#mac
    • Windows Subsystem for Linux: see Linux
    • Windows: https://cloud.google.com/sdk/docs/install#windows
    Installing on codespace

    If you are on codespace, run the commands below to install the gcloud tool to your machine

    Note : If you used the custom codespace, it should already be installed, try gcloud init directly

    echo \"deb https://packages.cloud.google.com/apt cloud-sdk main\" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list\ncurl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -\nsudo apt-get update && sudo apt-get install google-cloud-cli\n

    Then run gcloud init in your terminal to configure the google cloud sdk with your account

    You should at some point see at terminal with a link. Click on the link and login with your google accound, then copy the token to your codespace.

    Your github codespace is now configured with your google cloud platform credentials

    "},{"location":"1_3_gcp_handson.html#4-my-first-google-compute-engine-instance","title":"4. My first Google Compute Engine Instance","text":"

    First, we will make our first steps by creating a compute engine instance (a vm) using the console, connecting to it via SSH, interacting with it, uploading some files, and we will shut it down and make the magic happen by resizing it

    • What is google cloud compute engine ? try to describe it with your own words
    "},{"location":"1_3_gcp_handson.html#4a-creating-my-vm-using-the-console-the-gui","title":"4a. Creating my VM using the console (the GUI)","text":"
    • Create your VM from the google cloud interface : Go to this link and follow the \"CONSOLE\" instruction

    • Create an instance with the following parameters

      • type: n1-standard-1
      • zone: europe-west1-b (Belgium)
      • os: ubuntu 22.04 x86
      • boot disk size: 10 Gb
      • boot disk type: pd-standard
    • Give it a name of your choice (that you can remember)
    • DO NOT SHUT IT DOWN for now
    Note

    If you were using the command line, you would have done this

    gcloud compute instances create {name} --project={your-project} --zone={your-zone} \\\n  --machine-type=n1-standard-1 \\\n  --image=ubuntu-2204-jammy-v20231030 \\\n  --image-project=ubuntu-os-cloud\n  --create-disk=auto-delete=yes,boot=yes,device-name=dev-instance-{index},image=projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20231030,mode=rw,size=10,type=projects/sdd2324/zones/{your-zone}/diskTypes/pd-standard \\\n
    "},{"location":"1_3_gcp_handson.html#4b-connecting-to-ssh","title":"4b. Connecting to SSH","text":"
    • Connect to ssh from the github codespace

      Solution

      gcloud compute ssh ${MACHINE-NAME}

      Note

      We are using google compute ssh instead of ssh. This is an automated tool that takes care of locating your machine in GCP and transferring the keys

    • Check available disk space

      Solution

      df -h

    • Check the OS name

      Solution

      cat /etc/os-release

    • Check the CPU model

      Solution

      cat /proc/cpuinfo

    • Check the number of cores available and the RAM

      Solution

      htop

    "},{"location":"1_3_gcp_handson.html#4c-the-magic-of-redimensioning-vms","title":"4c. The magic of redimensioning VMs","text":"
    • Shutdown the VM (from the web browser), check the previous codelab to see how to do it
    • Select it and click on EDIT
    • Change the machine type to n1-standard-2 (link to documentation)
    • Relaunch it, reconnect to it and try to check using htop the number of cores & RAM available
    • Note : If you run cat /proc/cpuinfo again you will see that you are running on the same hardware !

    Magic isn't it ?

    Note: If you had any files and specific configuration, they would still be here !

    "},{"location":"1_3_gcp_handson.html#4d-transfering-files-from-the-computer-or-codespaces-to-this-machine","title":"4d. Transfering files from the computer (or codespaces) to this machine","text":"
    • We will use the terminal to transfer some files from* your computer (or codespaces) to** this machine,
    • If you use cloud shell you can do it as well : create a dummy file in cloud shell

    • Follow this link to learn how to use the gcloud cli tool to transfer files to your instance TOC

    • For experts, it's possible to do it manually using rsync from ssh or scp

    • Transfer some files to your /home/${USER} directory

    • List them from your instance (ls)

    How do we do the opposite ?

    See section 5.

    "},{"location":"1_3_gcp_handson.html#4e-persistent-ssh-sessions-with-tmux","title":"4e. Persistent SSH sessions with TMUX","text":"
    • Connect to your GCE instance using SSH from the codespace
    • Question: What happens if you start a long computation and disconnect ?
    • Check that tmux is installed on the remote instance (run tmux). if not install it
    • Follow this tutorial: https://www.hamvocke.com/blog/a-quick-and-easy-guide-to-tmux/
    • To check you have understood you should be able to:
      • Connect to your remote instance with ssh
      • Start a tmux session
      • Launch a process (for example htop) inside it
      • Detach from the session (CTRL+B then type :detach)
      • Kill the ssh connection
      • Connect again
      • tmux attach to your session
      • Your process should still be here !

    Congratulations :)

    "},{"location":"1_3_gcp_handson.html#5-interacting-with-google-cloud-storage","title":"5. Interacting with Google Cloud Storage","text":"

    Here we will discover google cloud storage, upload some files from your computer and download them from your instance in the cloud

    • What is Google Cloud Storage ? Try to describe it with your own words

    • Use this tutorial to upload something from your computer to google cloud storage from the web browser (DO NOT DELETE THE FILES YET)

    Now we will download it using the google cloud CLI tool. Here's the documentation

    Follow the tutorial to learn how to do what you just did, but this time using gsutil from your codespace

    • List the content of the bucket you just created (if you deleted it previously, create a new one)
    • Upload a file to a bucket
    • Download a file from a bucket

    Optional : What if we want to do the same from the GCE instance ?

    • Now go back to your machine

    • Try to list bucket, download and upload files

    • Is it possible ?

    • If not, it's because you have to allow the instance to access google cloud storage

    • Shutdown the VM and edit it (like we did when we resized the instance)

    • Check \"access scopes\", select \"set access for each api\", and select \"storage / admin\"

    • Now restart you machine, connect back to it. You should be able to upload to google cloud storage now files now

    • You can delete the VM as well, we will not use it

    "},{"location":"1_3_gcp_handson.html#6-deep-learning-vm-ssh-and-port-forwarding","title":"6. Deep Learning VM, SSH and Port Forwarding","text":""},{"location":"1_3_gcp_handson.html#6a-deep-learning-vm","title":"6a. deep learning vm","text":"

    Here we will use the google cloud sdk to create a more complex VM with a pre-installed image and connect to its jupyter server

    Google Cloud Platform comes with a set of services targeted at data scientists called AI Platform, among them are Deep Learning VMs which are essentially preinstalled VMs (more or less the same configuration as google colab) with some bonuses.

    • What are \"Deep Learning VMs\" ? Try to use your own words
    • What would be the alternative if you wanted to get a machine with the same installation ?
    "},{"location":"1_3_gcp_handson.html#6b-create-a-google-compute-engine-instance-using-the-command-line","title":"6b. create a google compute engine instance using the command line","text":"

    Instead of using the browser to create this machine, we will be using the CLI to create instances

    export INSTANCE_NAME=\"fch-dlvm-1\" # <--- RENAME THIS !!!!!!!!!!\n\ngcloud compute instances create $INSTANCE_NAME \\\n        --zone=\"europe-west1-b\" \\\n        --image-family=\"common-cpu\" \\\n        --image-project=\"deeplearning-platform-release\" \\\n        --maintenance-policy=\"TERMINATE\" \\\n        --scopes=\"storage-rw\" \\\n        --machine-type=\"n1-standard-1\" \\\n        --boot-disk-size=\"50GB\" \\\n        --boot-disk-type=\"pd-standard\"\n
    • Notice the similarities between the first VM you created and this one,
    • What changed ?
    • If you want to learn more about compute images, image families etc... go here
    "},{"location":"1_3_gcp_handson.html#6c-connect-with-ssh-to-this-machine-with-port-forwarding","title":"6c. connect with ssh to this machine with port forwarding","text":"
    • Connect to your instance using the gcloud cli & ssh from the codespace with port forwarding

    • Forward the port 8888 when you're connecting to the instance

    • Documentation forward some ports as well

    Solution

    gcloud compute ssh user@machine-name --zone=europe-west1-b -- -L 8888:localhost:8888

    If you are in codespace, use the port forwarding utility, add a new port (8888). It may be done automatically.

    • Explore the machine the same way we did previously

    • You can see you have a conda envirnoment installed. Try to query the list of things installed

    Solution

    conda list pip list

    • is (py)torch installed ? If not, install it
    Solution

    pip list | grep torch pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

    "},{"location":"1_3_gcp_handson.html#6d-run-jupyter-lab-on-the-gce-vm","title":"6d. Run jupyter lab on the GCE VM","text":"
    • In the GCE VM, run jupyter lab

    • Copy the credentials

    • Connect to the port 8888 of the GitHub CodeSpace. You should be redirected to a jupyter instance

    Question

    Where are we ? Where is the jupyter lab hosted ? What is the difference between this and the jupyter lab we launched from codespace last week ?

    Don't disconnect from the VM, we will continue below

    "},{"location":"1_3_gcp_handson.html#7-end-to-end-example","title":"7. End to end example","text":"

    We will replicate the following setup (simplified)

    • Your development machine (the github codespace) has some training code
    • You have a \"high performance\" machine in the cloud
    • You want to transfer the training code to the VM
    • You want to run the training in a remote machine
    • Once the training is done you want to upload the model weights to google cloud storage

    • In your codespace, in a new folder (eg. training), copy the content of this

    Solution

    gcloud compute scp --recurse training ${USER}@{MACHINE}:/home/${USER}/

    You should find it on your GCE VM

    • Run it using python train.py --epochs 1 --save-model.

    It should train a neural network on the MNIST dataset. BONUS : Run it inside a tmux session ;)

    • Once it has finished, you should see a new file, the model weights mnist_cnn.pt

    • From the GCE VM : Upload the weights to the google cloud storage bucket you previously created

    Solution

    gcloud storage cp mnist_cnn.pt gs://(...)

    • From the GitHubCodespace : Download the model weights from google cloud storage
    Solution

    gcloud storage cp gs://(...) mnist_cnn.pt

    Success

    yay ! Don't forget to cleanup

    "},{"location":"1_3_gcp_handson.html#8-important-cleaning-up","title":"8. IMPORTANT : Cleaning up","text":"

    Warning

    • DELETE ALL THE BUCKES YOU CREATED
    • DELETE ALL THE GCP INSTANCES YOU CREATED
    • SHUTDOWN YOUR CODESPACE

    How to shutdown codespaces :

    • Click on stop codespace to shut it down (you \"pay\" for the disk with your free credits)
    • Click on kill codespace to delete it
    "},{"location":"1_3_gcp_handson.html#9-optional-introduction-to-infrastructure-as-code","title":"9. Optional - Introduction to infrastructure as code","text":"
    • This tutorial will guide you through google cloud deployment manager, which is a way to deploy google compute engine instances using configuration files

    • Don't forget to adapt machine configurations and zone to your use case (see above)

    If you run this, don't forget to clean everything up afterwards

    "},{"location":"1_3_gcp_handson.html#10-optional-managed-database","title":"10. Optional - Managed Database","text":"
    • I think you've just done a class on SQL databases

    • Here are the managed SQL services of google cloud

    Question

    Can you describe what it is ? What do you pay to google ? How much does it cost ? What is a managed service in cloud vocabulary ?

    • If you still have some code to interact with a database, you can try launching one here and redoing your classes
    "},{"location":"1_3_gcp_lecture.html","title":"Google Cloud Platform","text":"

    Link to slides

    "},{"location":"1_4_containers.html","title":"From Virtualisation to Containerisation","text":"

    Link to slides

    "},{"location":"1_4_docker_tp.html","title":"Docker: Hands on","text":"

    Note

    If you are lost, docker system prune will remove dangling images and stopped containers

    "},{"location":"1_4_docker_tp.html#0-how-to-run-this","title":"0. How to run this ?","text":"

    Abstract

    We will discover the basics of docker and you will be able to manipulate your first images and containers !

    You should be inside the Github Codespace you created and have google cloud SDK installed in it

    If not, refer to the previous tutorial and do step 2 and 3

    This codespace has everything you need, including docker

    If you want to do everything from your linux machine you can install docker but I don't recommend it for now

    "},{"location":"1_4_docker_tp.html#1-manipulating-docker-for-the-1st-time","title":"1. Manipulating docker for the 1st time","text":"

    Source: https://github.com/docker/labs

    To get started, let's run the following in our terminal:

    $ docker pull alpine\n

    The pull command fetches the alpine image from the Docker registry and saves it in our system. You can use the docker images command to see a list of all images on your system.

    $ docker images\nREPOSITORY              TAG                 IMAGE ID            CREATED             VIRTUAL SIZE\nalpine                 latest              c51f86c28340        4 weeks ago         1.109 MB\nhello-world             latest              690ed74de00f        5 months ago        960 B\n

    "},{"location":"1_4_docker_tp.html#11-docker-run","title":"1.1 Docker Run","text":"

    Great! Let's now run a Docker container based on this image. To do that you are going to use the docker run command.

    $ docker run alpine ls -l\ntotal 48\ndrwxr-xr-x    2 root     root          4096 Mar  2 16:20 bin\ndrwxr-xr-x    5 root     root           360 Mar 18 09:47 dev\ndrwxr-xr-x   13 root     root          4096 Mar 18 09:47 etc\ndrwxr-xr-x    2 root     root          4096 Mar  2 16:20 home\ndrwxr-xr-x    5 root     root          4096 Mar  2 16:20 lib\n......\n......\n

    What happened? Behind the scenes, a lot of stuff happened. When you call run,

    1. The Docker client contacts the Docker daemon
    2. The Docker daemon checks local store if the image (alpine in this case) is available locally, and if not, downloads it from Docker Store. (Since we have issued docker pull alpine before, the download step is not necessary)
    3. The Docker daemon creates the container and then runs a command in that container.
    4. The Docker daemon streams the output of the command to the Docker client

    When you run docker run alpine, you provided a command (ls -l), so Docker started the command specified and you saw the listing.

    Let's try something more exciting.

    $ docker run alpine echo \"hello from alpine\"\nhello from alpine\n
    OK, that's some actual output. In this case, the Docker client dutifully ran the echo command in our alpine container and then exited it. If you've noticed, all of that happened pretty quickly. Imagine booting up a virtual machine, running a command and then killing it. Now you know why they say containers are fast!

    Try another command.

    docker run alpine /bin/sh\n

    Wait, nothing happened! Is that a bug? Well, no. These interactive shells will exit after running any scripted commands, unless they are run in an interactive terminal - so for this example to not exit, you need to docker run -it alpine /bin/sh.

    You are now inside the container shell and you can try out a few commands like ls -l, uname -a and others. Exit out of the container by giving the exit command.

    Ok, now it's time to see the docker ps command. The docker ps command shows you all containers that are currently running.

    $ docker ps\nCONTAINER ID        IMAGE               COMMAND             CREATED             STATUS              PORTS               NAMES\n

    Since no containers are running, you see a blank line. Let's try a more useful variant: docker ps -a

    $ docker ps -a\nCONTAINER ID        IMAGE               COMMAND                  CREATED             STATUS                      PORTS               NAMES\n36171a5da744        alpine              \"/bin/sh\"                5 minutes ago       Exited (0) 2 minutes ago                        fervent_newton\na6a9d46d0b2f        alpine             \"echo 'hello from alp\"    6 minutes ago       Exited (0) 6 minutes ago                        lonely_kilby\nff0a5c3750b9        alpine             \"ls -l\"                   8 minutes ago       Exited (0) 8 minutes ago                        elated_ramanujan\nc317d0a9e3d2        hello-world         \"/hello\"                 34 seconds ago      Exited (0) 12 minutes ago                       stupefied_mcclintock\n

    What you see above is a list of all containers that you ran. Notice that the STATUS column shows that these containers exited a few minutes ago. You're probably wondering if there is a way to run more than just one command in a container. Let's try that now:

    $ docker run -it alpine /bin/sh\n/ # ls\nbin      dev      etc      home     lib      linuxrc  media    mnt      proc     root     run      sbin     sys      tmp      usr      var\n/ # uname -a\nLinux 97916e8cb5dc 4.4.27-moby #1 SMP Wed Oct 26 14:01:48 UTC 2016 x86_64 Linux\n
    Running the run command with the -it flags attaches us to an interactive tty in the container. Now you can run as many commands in the container as you want. Take some time to run your favorite commands.

    That concludes a whirlwind tour of the docker run command which would most likely be the command you'll use most often. It makes sense to spend some time getting comfortable with it. To find out more about run, use docker run --help to see a list of all flags it supports. As you proceed further, we'll see a few more variants of docker run.

    "},{"location":"1_4_docker_tp.html#12-terminology","title":"1.2 Terminology","text":"

    In the last section, you saw a lot of Docker-specific jargon which might be confusing to some. So before you go further, let's clarify some terminology that is used frequently in the Docker ecosystem.

    • Images - The file system and configuration of our application which are used to create containers. To find out more about a Docker image, run docker inspect alpine. In the demo above, you used the docker pull command to download the alpine image. When you executed the command docker run hello-world, it also did a docker pull behind the scenes to download the hello-world image.
    • Containers - Running instances of Docker images \u2014 containers run the actual applications. A container includes an application and all of its dependencies. It shares the kernel with other containers, and runs as an isolated process in user space on the host OS. You created a container using docker run which you did using the alpine image that you downloaded. A list of running containers can be seen using the docker ps command.
    • Docker daemon - The background service running on the host that manages building, running and distributing Docker containers.
    • Docker client - The command line tool that allows the user to interact with the Docker daemon.
    • Docker Store - A registry of Docker images, where you can find trusted and enterprise ready containers, plugins, and Docker editions. You'll be using this later in this tutorial.
    "},{"location":"1_4_docker_tp.html#20-webapps-with-docker","title":"2.0 Webapps with Docker","text":"

    Source: https://github.com/docker/labs

    Great! So you have now looked at docker run, played with a Docker container and also got the hang of some terminology. Armed with all this knowledge, you are now ready to get to the real stuff \u2014 deploying web applications with Docker.

    "},{"location":"1_4_docker_tp.html#21-run-a-static-website-in-a-container","title":"2.1 Run a static website in a container","text":"

    Note: Code for this section is in this repo in the website directory

    Let's start by taking baby-steps. First, we'll use Docker to run a static website in a container. The website is based on an existing image. We'll pull a Docker image from Docker Store, run the container, and see how easy it is to set up a web server.

    The image that you are going to use is a single-page website that was already created for this demo and is available on the Docker Store as dockersamples/static-site. You can download and run the image directly in one go using docker run as follows.

    docker run -d dockersamples/static-site\n

    Files:

    • Dockerfile
    • hello_docker.html

    Note: The current version of this image doesn't run without the -d flag. The -d flag enables detached mode, which detaches the running container from the terminal/shell and returns your prompt after the container starts. We are debugging the problem with this image but for now, use -d even for this first example.

    So, what happens when you run this command?

    Since the image doesn't exist on your Docker host, the Docker daemon first fetches it from the registry and then runs it as a container.

    Now that the server is running, do you see the website? What port is it running on? And more importantly, how do you access the container directly from our host machine?

    Actually, you probably won't be able to answer any of these questions yet! \u263a In this case, the client didn't tell the Docker Engine to publish any of the ports, so you need to re-run the docker run command to add this instruction.

    Let's re-run the command with some new flags to publish ports and pass your name to the container to customize the message displayed. We'll use the -d option again to run the container in detached mode.

    First, stop the container that you have just launched. In order to do this, we need the container ID.

    Since we ran the container in detached mode, we don't have to launch another terminal to do this. Run docker ps to view the running containers.

    $ docker ps\nCONTAINER ID        IMAGE                  COMMAND                  CREATED             STATUS              PORTS               NAMES\na7a0e504ca3e        dockersamples/static-site   \"/bin/sh -c 'cd /usr/\"   28 seconds ago      Up 26 seconds       80/tcp, 443/tcp     stupefied_mahavira\n

    Check out the CONTAINER ID column. You will need to use this CONTAINER ID value, a long sequence of characters, to identify the container you want to stop, and then to remove it. The example below provides the CONTAINER ID on our system; you should use the value that you see in your terminal.

    $ docker stop a7a0e504ca3e\n$ docker rm   a7a0e504ca3e\n

    Note: A cool feature is that you do not need to specify the entire CONTAINER ID. You can just specify a few starting characters and if it is unique among all the containers that you have launched, the Docker client will intelligently pick it up.

    Now, let's launch a container in detached mode as shown below:

    $ docker run --name static-site -e AUTHOR=\"Your Name\" -d -P dockersamples/static-site\ne61d12292d69556eabe2a44c16cbd54486b2527e2ce4f95438e504afb7b02810\n

    In the above command:

    • -d will create a container with the process detached from our terminal
    • -P will publish all the exposed container ports to random ports on the Docker host
    • -e is how you pass environment variables to the container
    • --name allows you to specify a container name
    • AUTHOR is the environment variable name and Your Name is the value that you can pass

    Now you can see the ports by running the docker port command.

    $ docker port static-site\n443/tcp -> 0.0.0.0:32772\n80/tcp -> 0.0.0.0:32773\n

    If you are on codespace, create a port forwarding on port 80 to connect to the website

    If you are running Docker for Mac, Docker for Windows, or Docker on Linux, you can open http://localhost:[YOUR_PORT_FOR 80/tcp]. For our example this is http://localhost:32773.

    If you are using Docker Machine on Mac or Windows, you can find the hostname on the command line using docker-machine as follows (assuming you are using the default machine).

    $ docker-machine ip default\n192.168.99.100\n
    You can now open http://<YOUR_IPADDRESS>:[YOUR_PORT_FOR 80/tcp] to see your site live! For our example, this is: http://192.168.99.100:32773.

    You can also run a second webserver at the same time, specifying a custom host port mapping to the container's webserver.

    $ docker run --name static-site-2 -e AUTHOR=\"Your Name\" -d -p 8888:80 dockersamples/static-site\n

    To deploy this on a real server you would just need to install Docker, and run the above docker command(as in this case you can see the AUTHOR is Docker which we passed as an environment variable).

    Now that you've seen how to run a webserver inside a Docker container, how do you create your own Docker image? This is the question we'll explore in the next section.

    But first, let's stop and remove the containers since you won't be using them anymore.

    $ docker stop static-site\n$ docker rm static-site\n

    Let's use a shortcut to remove the second site:

    $ docker rm -f static-site-2\n

    Run docker ps to make sure the containers are gone.

    $ docker ps\nCONTAINER ID        IMAGE               COMMAND             CREATED             STATUS              PORTS               NAMES\n
    "},{"location":"1_4_docker_tp.html#22-docker-images","title":"2.2 Docker Images","text":"

    In this section, let's dive deeper into what Docker images are. You will build your own image, use that image to run an application locally.

    Docker images are the basis of containers. In the previous example, you pulled the dockersamples/static-site image from the registry and asked the Docker client to run a container based on that image. To see the list of images that are available locally on your system, run the docker images command.

    $ docker images\nREPOSITORY             TAG                 IMAGE ID            CREATED             SIZE\ndockersamples/static-site   latest              92a386b6e686        2 hours ago        190.5 MB\nnginx                  latest              af4b3d7d5401        3 hours ago        190.5 MB\npython                 2.7                 1c32174fd534        14 hours ago        676.8 MB\npostgres               9.4                 88d845ac7a88        14 hours ago        263.6 MB\ncontainous/traefik     latest              27b4e0c6b2fd        4 days ago          20.75 MB\nnode                   0.10                42426a5cba5f        6 days ago          633.7 MB\nredis                  latest              4f5f397d4b7c        7 days ago          177.5 MB\nmongo                  latest              467eb21035a8        7 days ago          309.7 MB\nalpine                 3.3                 70c557e50ed6        8 days ago          4.794 MB\njava                   7                   21f6ce84e43c        8 days ago          587.7 MB\n

    Above is a list of images that I've pulled from the registry and those I've created myself (we'll shortly see how). You will have a different list of images on your machine. The TAG refers to a particular snapshot of the image and the ID is the corresponding unique identifier for that image.

    For simplicity, you can think of an image akin to a git repository - images can be committed with changes and have multiple versions. When you do not provide a specific version number, the client defaults to latest.

    For example you could pull a specific version of ubuntu image as follows:

    $ docker pull ubuntu:12.04\n

    If you do not specify the version number of the image then, as mentioned, the Docker client will default to a version named latest.

    So for example, the docker pull command given below will pull an image named ubuntu:latest:

    $ docker pull ubuntu\n

    To get a new Docker image you can either get it from a registry (such as the Docker Store) or create your own. There are hundreds of thousands of images available on Docker Store. You can also search for images directly from the command line using docker search.

    An important distinction with regard to images is between base images and child images.

    • Base images are images that have no parent images, usually images with an OS like ubuntu, alpine or debian.

    • Child images are images that build on base images and add additional functionality.

    Another key concept is the idea of official images and user images. (Both of which can be base images or child images.)

    • Official images are Docker sanctioned images. Docker, Inc. sponsors a dedicated team that is responsible for reviewing and publishing all Official Repositories content. This team works in collaboration with upstream software maintainers, security experts, and the broader Docker community. These are not prefixed by an organization or user name. In the list of images above, the python, node, alpine and nginx images are official (base) images. To find out more about them, check out the Official Images Documentation.

    • User images are images created and shared by users like you. They build on base images and add additional functionality. Typically these are formatted as user/image-name. The user value in the image name is your Docker Store user or organization name.

    "},{"location":"1_4_docker_tp.html#23-create-your-first-image","title":"2.3 Create your first image","text":"

    Note: The code for this section is in this repository in the flask-app directory.

    Now that you have a better understanding of images, it's time to create your own. Our goal here is to create an image that sandboxes a small Flask application.

    The goal of this exercise is to create a Docker image which will run a Flask app.

    We'll do this by first pulling together the components for a random cat picture generator built with Python Flask, then dockerizing it by writing a Dockerfile. Finally, we'll build the image, and then run it.

    • Create a Python Flask app that displays random cat pix
    • Write a Dockerfile
    • Build the image
    • Run your image
    • Dockerfile commands summary
    "},{"location":"1_4_docker_tp.html#231-create-a-python-flask-app-that-displays-random-cat-pix","title":"2.3.1 Create a Python Flask app that displays random cat pix","text":"

    For the purposes of this workshop, we've created a fun little Python Flask app that displays a random cat .gif every time it is loaded - because, you know, who doesn't like cats?

    Start by creating a directory called flask-app where we'll create the following files:

    • app.py
    • requirements.txt
    • templates/index.html
    • Dockerfile

    Make sure to cd flask-app before you start creating the files, because you don't want to start adding a whole bunch of other random files to your image.

    "},{"location":"1_4_docker_tp.html#apppy","title":"app.py","text":"

    Create the app.py with the following content:

    from flask import Flask, render_template\nimport random\n\napp = Flask(__name__)\n\n# list of cat images\nimages = [\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif1.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif2.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif3.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif4.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif5.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif6.gif\",\n    ]\n\n@app.route('/')\ndef index():\n    url = random.choice(images)\n    return render_template('index.html', url=url)\n\nif __name__ == \"__main__\":\n    app.run(host=\"0.0.0.0\")\n
    "},{"location":"1_4_docker_tp.html#requirementstxt","title":"requirements.txt","text":"

    In order to install the Python modules required for our app, we need to create a file called requirements.txt and add the following line to that file:

    flask\ntyper\n
    "},{"location":"1_4_docker_tp.html#templatesindexhtml","title":"templates/index.html","text":"

    Create a directory called templates and create an index.html file in that directory with the following content in it:

    <html>\n  <head>\n    <style type=\"text/css\">\n      body {\n        background: black;\n        color: white;\n      }\n      div.container {\n        max-width: 500px;\n        margin: 100px auto;\n        border: 20px solid white;\n        padding: 10px;\n        text-align: center;\n      }\n      h4 {\n        text-transform: uppercase;\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"container\">\n      <h4>Cat Gif of the day</h4>\n      <img src=\"{{url}}\" />\n    </div>\n  </body>\n</html>\n
    "},{"location":"1_4_docker_tp.html#232-write-a-dockerfile","title":"2.3.2 Write a Dockerfile","text":"

    We want to create a Docker image with this web app. As mentioned above, all user images are based on a base image. Since our application is written in Python, we will build our own Python image based on Alpine. We'll do that using a Dockerfile.

    A Dockerfile is a text file that contains a list of commands that the Docker daemon calls while creating an image. The Dockerfile contains all the information that Docker needs to know to run the app \u2014 a base Docker image to run from, location of your project code, any dependencies it has, and what commands to run at start-up. It is a simple way to automate the image creation process. The best part is that the commands you write in a Dockerfile are almost identical to their equivalent Linux commands. This means you don't really have to learn new syntax to create your own Dockerfiles.

    1. Create a file called Dockerfile, and add content to it as described below.

    We'll start by specifying our base image, using the FROM keyword:

    FROM alpine:3.18\n

    Note : If you use the latest version of alpine which is 3.20, follow this tutorial to handle an error you might be getting

    1. The next step usually is to write the commands of copying the files and installing the dependencies. But first we will install the Python pip package to the alpine linux distribution. This will not just install the pip package but any other dependencies too, which includes the python interpreter. Add the following RUN command next. Additionnally, we will do something to handle the newest python rules []
    RUN apk add --update py-pip\n
    1. Let's add the files that make up the Flask Application.

    Install all Python requirements for our app to run. This will be accomplished by adding the lines:

    COPY requirements.txt /usr/src/app/\nRUN pip install --no-cache-dir -r /usr/src/app/requirements.txt\n

    Copy the files you have created earlier into our image by using COPY command.

    COPY app.py /usr/src/app/\nCOPY templates/index.html /usr/src/app/templates/\n
    1. Specify the port number which needs to be exposed. Since our flask app is running on 5000 that's what we'll expose.
    EXPOSE 5000\n
    1. The last step is the command for running the application which is simply - python ./app.py. Use the CMD command to do that:
    CMD [\"python\", \"/usr/src/app/app.py\"]\n

    The primary purpose of CMD is to tell the container which command it should run by default when it is started.

    1. Verify your Dockerfile.

    Our Dockerfile is now ready. This is how it looks:

    # our base image\nFROM alpine:3.18\n\n# Install python and pip\nRUN apk add --update py-pip\n\n# install Python modules needed by the Python app\nCOPY requirements.txt /usr/src/app/\nRUN pip install --no-cache-dir -r /usr/src/app/requirements.txt\n\n# copy files required for the app to run\nCOPY app.py /usr/src/app/\nCOPY templates/index.html /usr/src/app/templates/\n\n# tell the port number the container should expose\nEXPOSE 5000\n\n# run the application\nCMD [\"python\", \"/usr/src/app/app.py\"]\n
    "},{"location":"1_4_docker_tp.html#233-build-the-image","title":"2.3.3 Build the image","text":"

    Now that you have your Dockerfile, you can build your image. The docker build command does the heavy-lifting of creating a docker image from a Dockerfile.

    The docker build command is quite simple - it takes an optional tag name with the -t flag, and the location of the directory containing the Dockerfile - the . indicates the current directory:

    docker build -t myfirstapp:1.0 .

    $ docker build -t myfirstapp:1.0 .\nSending build context to Docker daemon 9.728 kB\nStep 1 : FROM alpine:18\n ---> 0d81fc72e790\nStep 2 : RUN apk add --update py-pip\n ---> Running in 8abd4091b5f5\nfetch http://dl-4.alpinelinux.org/alpine/v3.3/main/x86_64/APKINDEX.tar.gz\nfetch http://dl-4.alpinelinux.org/alpine/v3.3/community/x86_64/APKINDEX.tar.gz\n(1/12) Installing libbz2 (1.0.6-r4)\n(2/12) Installing expat (2.1.0-r2)\n(3/12) Installing libffi (3.2.1-r2)\n(4/12) Installing gdbm (1.11-r1)\n(5/12) Installing ncurses-terminfo-base (6.0-r6)\n(6/12) Installing ncurses-terminfo (6.0-r6)\n(7/12) Installing ncurses-libs (6.0-r6)\n(8/12) Installing readline (6.3.008-r4)\n(9/12) Installing sqlite-libs (3.9.2-r0)\n(10/12) Installing python (2.7.11-r3)\n(11/12) Installing py-setuptools (18.8-r0)\n(12/12) Installing py-pip (7.1.2-r0)\nExecuting busybox-1.24.1-r7.trigger\nOK: 59 MiB in 23 packages\n ---> 976a232ac4ad\nRemoving intermediate container 8abd4091b5f5\nStep 3 : COPY requirements.txt /usr/src/app/\n ---> 65b4be05340c\nRemoving intermediate container 29ef53b58e0f\nStep 4 : RUN pip install --no-cache-dir -r /usr/src/app/requirements.txt\n ---> Running in a1f26ded28e7\nCollecting Flask==0.10.1 (from -r /usr/src/app/requirements.txt (line 1))\n  Downloading Flask-0.10.1.tar.gz (544kB)\nCollecting Werkzeug>=0.7 (from Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading Werkzeug-0.11.4-py2.py3-none-any.whl (305kB)\nCollecting Jinja2>=2.4 (from Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading Jinja2-2.8-py2.py3-none-any.whl (263kB)\nCollecting itsdangerous>=0.21 (from Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading itsdangerous-0.24.tar.gz (46kB)\nCollecting MarkupSafe (from Jinja2>=2.4->Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading MarkupSafe-0.23.tar.gz\nInstalling collected packages: Werkzeug, MarkupSafe, Jinja2, itsdangerous, Flask\n  Running setup.py install for MarkupSafe\n  Running setup.py install for itsdangerous\n  Running setup.py install for Flask\nSuccessfully installed Flask-0.10.1 Jinja2-2.8 MarkupSafe-0.23 Werkzeug-0.11.4 itsdangerous-0.24\nYou are using pip version 7.1.2, however version 8.1.1 is available.\nYou should consider upgrading via the 'pip install --upgrade pip' command.\n ---> 8de73b0730c2\nRemoving intermediate container a1f26ded28e7\nStep 5 : COPY app.py /usr/src/app/\n ---> 6a3436fca83e\nRemoving intermediate container d51b81a8b698\nStep 6 : COPY templates/index.html /usr/src/app/templates/\n ---> 8098386bee99\nRemoving intermediate container b783d7646f83\nStep 7 : EXPOSE 5000\n ---> Running in 31401b7dea40\n ---> 5e9988d87da7\nRemoving intermediate container 31401b7dea40\nStep 8 : CMD python /usr/src/app/app.py\n ---> Running in 78e324d26576\n ---> 2f7357a0805d\nRemoving intermediate container 78e324d26576\nSuccessfully built 2f7357a0805d\n

    If you don't have the alpine:3.18 image, the client will first pull the image and then create your image. Therefore, your output on running the command will look different from mine. If everything went well, your image should be ready! Run docker images and see if your image (<YOUR_USERNAME>/myfirstapp) shows.

    "},{"location":"1_4_docker_tp.html#234-run-your-image","title":"2.3.4 Run your image","text":"

    The next step in this section is to run the image and see if it actually works.

    $ docker run -p 8888:5000 --name myfirstapp myfirstapp:1.0\n * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)\n

    Head over to http://localhost:8888 and your app should be live. Note If you are using Docker Machine, you may need to open up another terminal and determine the container ip address using docker-machine ip default.

    Hit the Refresh button in the web browser to see a few more cat images.

    "},{"location":"1_4_docker_tp.html#235-dockerfile-commands-summary","title":"2.3.5 Dockerfile commands summary","text":"

    Here's a quick summary of the few basic commands we used in our Dockerfile.

    • FROM starts the Dockerfile. It is a requirement that the Dockerfile must start with the FROM command. Images are created in layers, which means you can use another image as the base image for your own. The FROM command defines your base layer. As arguments, it takes the name of the image. Optionally, you can add the Docker Cloud username of the maintainer and image version, in the format username/imagename:version.

    • RUN is used to build up the Image you're creating. For each RUN command, Docker will run the command then create a new layer of the image. This way you can roll back your image to previous states easily. The syntax for a RUN instruction is to place the full text of the shell command after the RUN (e.g., RUN mkdir /user/local/foo). This will automatically run in a /bin/sh shell. You can define a different shell like this: RUN /bin/bash -c 'mkdir /user/local/foo'

    • COPY copies local files into the container.

    • CMD defines the commands that will run on the Image at start-up. Unlike a RUN, this does not create a new layer for the Image, but simply runs the command. There can only be one CMD per a Dockerfile/Image. If you need to run multiple commands, the best way to do that is to have the CMD run a script. CMD requires that you tell it where to run the command, unlike RUN. So example CMD commands would be:

      CMD [\"python\", \"./app.py\"]\n\n  CMD [\"/bin/bash\", \"echo\", \"Hello World\"]\n
    • EXPOSE creates a hint for users of an image which ports provide services. It is included in the information which can be retrieved via $ docker inspect <container-id>.

    Note: The EXPOSE command does not actually make any ports accessible to the host! Instead, this requires publishing ports by means of the -p flag when using $ docker run.

    • PUSH pushes your image to Docker Cloud, or alternately to a private registry

    Note: If you want to learn more about Dockerfiles, check out Best practices for writing Dockerfiles.

    "},{"location":"1_4_docker_tp.html#3-running-cli-apps-packaged-in-docker-while-mounting-volumes","title":"3. Running CLI apps packaged in docker while mounting volumes","text":"

    Beyond serving web applications, Docker also enables the deployment of packaged applications, such as command-line interfaces and training scripts. This allows for seamless delivery of self-contained apps with bespoke installations to end-users. A particularly valuable use case is packaging machine learning environments for distributed training, facilitating efficient collaboration and scalability

    To do so, we have to learn about : - Executing command line applications packaged inside docker images - Passing both text and files inputs, including files not in the docker image - Getting accesss to file outputs such as models

    For that we will do several things : - Write a CLI application using typer, a very useful tool for the rest of your career - Package the CLI application with both text / file inputs in a docker image - Mounting volumes when running a docker images to provide it with the input files, and having access to the results from the main computer

    "},{"location":"1_4_docker_tp.html#31-a-local-cli-application","title":"3.1 A local CLI application","text":"
    • Let's modify the app.py in 2. with the following code.
    import time\nfrom pathlib import Path\nfrom typing import Annotated, Optional\n\nimport typer\n\napp = typer.Typer()\n\n\n@app.command()\ndef say_hello(name: str):\n    typer.echo(f\"Hello {name}\")\n\n\n@app.command()\ndef run_training(\n    config: Annotated[\n        Path,\n        typer.Option(\n            exists=True,\n            file_okay=True,\n            dir_okay=False,\n            writable=False,\n            readable=True,\n            resolve_path=True,\n        ),\n    ],\n    output_dir: Annotated[\n        Path,\n        typer.Option(\n            dir_okay=True,\n            writable=True,\n            readable=True,\n            resolve_path=True,\n            file_okay=False,\n        ),\n    ],\n):\n    text = config.read_text()\n    print(f\"Config file contents: {text}\")\n\n    print(f\"Running training in {output_dir}...\")\n\n    time.sleep(10)\n\n    output_dir.mkdir(exist_ok=True,parents=True)\n\n    with open(output_dir / \"results.txt\", \"w\") as f:\n        f.write(\"Training successful !\")\n\n\nif __name__ == \"__main__\":\n    app()\n
    • Test the application locally using pip install typerthen python app.py say-hello {my name} or python app.py run-training --config {my config} --output-dir {somewhere}
    "},{"location":"1_4_docker_tp.html#32-packaging-it-in-a-dockerfile","title":"3.2 Packaging it in a dockerfile","text":"

    We will now package it in a docker file

    • Modify the dockerfile :
    • Replace CMD [\"python3\", \"/usr/src/app/app.py\"]
    • By ENTRYPOINT [\"python3\", \"/usr/src/app/app.py\"]

    • Differences between CMD and ENTRYPOINT

    • Rebuild your docker image (maybe give it another name)

    "},{"location":"1_4_docker_tp.html#33-mounting-volumes","title":"3.3 Mounting volumes","text":"
    • Now to run the CLI you just have to pass the arguments when running the docker docker run --rm {your image} {your args}. Try it with docker run {...} hello {your name}

    Warning

    once you have built your container and it works, don't rebuild it again ! We will test the volume mounting options now

    • In order to pass a config file, or data to your docker, you need to make it available to your docker. To do that, we have to mount volumes

    Create a dummy config file (config.txt) in another folder (ex: config/) then mount it when you run the docker container. You can expose the output directory as well to be able to get your results

    docker run --rm \\\n  -v {local path to your configs}:/home/configs \\\n  -v {local path to your outputs}:/home/outputs \\\n  --workdir /home/ \\\n  {your image} \\\n  run-training --config {path to your config in DOCKER, eg /home/configs/config.txt}  \\\n  --output-dir /home/outputs/\n

    Note that since you mounted volumes, you must pass the local path in the docker container to your config file for it to work and not the path in your codespace

    Success

    To be successful here you have to be able to pass a config file that is in your codespace and get the results in your codespace, all while not rebuilding the image as long as the first hello passes

    "},{"location":"1_4_docker_tp.html#4-containers-registry","title":"4. Containers Registry","text":"

    Remember Container Registries ? Here as some explainers

    The main container registry is dockerhub, https://hub.docker.com/

    All docker engines that have access to the internet have access to this main hub, and this is where we pulled our base images from before

    Example, the Python Image

    Google Cloud has an Artifacts Registry per project, which ensures the docker images you build are accessible for the people who have access to your project only.

    We will follow this tutorial to push our images to artifact registry

    • First, create a Docker Artifact registry using this tutorial, example fch-sdd2425-artifacts-registry (that's mine, name it with your name). Set the repository in multi-region/europe

    • Pushing our images requires authenticating, gcloud auth configure-docker europe-docker.pkg.dev

    • Pushing our images requires tagging them in a specific way : europe-docker.pkg.dev/${PROJECT_ID}/${REPO_ID}/${IMAGE}:${TAG}

    • Use the docker cli to tag your previous myfirstapp image to the right namespace

    docker tag myfirstapp:1.0 europe-docker.pkg.dev/${PROJECT_ID}/${REPO_ID}/myfirstapp:1.0

    • Upload it on container registry

    docker push europe-docker.pkg.dev/${PROJECT_ID}/${REPO_ID}/[IMAGE]:[TAG]

    Hint

    to get your project id: PROJECT_ID=$(gcloud config get-value project 2> /dev/null) to get your artifact repository id look at this page, you can get your project id this way as well

    • Go to your artifact registry https://console.cloud.google.com/artifacts, you should see your docker image :)
    "},{"location":"1_4_docker_tp.html#5-bonus-data-science-standardized-environment-and-mounting-volumes","title":"5. Bonus. Data Science Standardized Environment and mounting volumes","text":"

    Note : This may not run in your native github codespace due to the storage available. If you encounter a storage error, run docker system prune to cleanup everything

    The purpose of this tutorial is to reproduce a sort of google colab environment using docker and github codespace.

    "},{"location":"1_4_docker_tp.html#51-intro","title":"5.1 Intro","text":"

    Those of us who work on a team know how hard it is to create a standardize development environment. Or if you have ever updated a dependency and had everything break, you understand the importance of keeping development environments isolated.

    Using Docker, we can create a project / team image with our development environment and mount a volume with our notebooks and data.

    The benefits of this workflow are that we can:

    • Separate out projects
    • Spin up a container to onboard new employees
    • Build an automated testing pipeline to confirm upgrade dependencies do not break code
    "},{"location":"1_4_docker_tp.html#52-jupyter-stack-docker-image","title":"5.2 Jupyter Stack Docker Image","text":"

    For this exercise we will use Jupyter Stack Docker Image which is a fully configured docker image that can be used as a data science container

    Take a look at the documentation and the dockerhub repository

    To get the docker image, run

    docker pull jupyter/scipy-notebook:lab-3.5.3\n
    "},{"location":"1_4_docker_tp.html#53-get-the-algorithm-in-ml-git-in-your-virtual-machine","title":"5.3 Get the algorithm in ML git in your Virtual Machine","text":"
    • From your vm, run git clone https://github.com/erachelson/MLclass.git, this should setup your AML class inside your VM
    "},{"location":"1_4_docker_tp.html#54-mounting-volumes-and-ports","title":"5.4 Mounting volumes and ports","text":"

    Now let's run the image. This container has a jupyter notebook accessible from port 8080 so we will need to map the host port 8888 (the one accessible from the ssh tunnel) to the docker port 8080, we will use port forwarding

    We will also need to make available the notebooks on the VM to the container... we will mount volumes. Your data is located in /home/${USER}/MLClass and we want to miunt it in /tmp/workdir

    docker run --rm -it \\\n  -p 8888:8888 \\\n  -v /home/${USER}/MLclass:/home/jovyan/work/MLClass \\\n  --workdir /home/jovyan/work \\\n  jupyter/scipy-notebook:lab-3.5.3\n

    Note: this image is large, delete it afterwards using docker rmi

    Options breakdown:

    • --rm remove the container when we stop it
    • -it run the container in interactive mode
    • -p forward port from host:container
    • other: options from the kaggle container

    You should now see a jupyter lab with mlclass accessible if you do another port mapping

    So to connect to the jupyter lab we mapped the ports local 8888 to vm 8888 and vm 8888 to docker 8888

    We also exposed the local disk to the container

    "},{"location":"1_4_docker_tp.html#6-bonus-docker-compose","title":"6. Bonus - Docker Compose","text":"

    Docker Compose is used to manage applications and increase efficiency in container development. Configurations are defined in a single YAML file, making applications easy to build and scale. Docker Compose is often used to set up a local environment

    The tutorial below aims to introduce fundamental concepts of Docker Compose by guiding you through the development of a basic Python web application.

    Using the Flask framework, the application features a hit counter in Redis, providing a practical example of how Docker Compose can be applied in web development scenarios.

    The concepts demonstrated here should be understandable even if you're not familiar with Python.

    This is a non-normative example that just highlights the key things you can do with Compose.

    https://docs.docker.com/compose/gettingstarted/

    You can find a more extensive example here :

    https://hackernoon.com/practical-introduction-to-docker-compose-d34e79c4c2b6

    https://github.com/docker/labs/blob/master/beginner/chapters/votingapp.md

    "},{"location":"1_4_docker_tp.html#7-bonus-using-google-cloud-tools-for-docker","title":"7. Bonus - Using Google Cloud Tools for Docker","text":"

    Using codespace, you should be able to do the Hello World Dockerfile exercise except that instead of using docker build you use Google Cloud Build

    Tutorial: https://cloud.google.com/cloud-build/docs/quickstart-docker

    Example command :gcloud builds submit --tag eu.gcr.io/$PROJECT_ID/{image}:{tag} .

    Help

    to get your project id: PROJECT_ID=$(gcloud config get-value project 2> /dev/null)

    Example

    Try to build the hello world app

    "},{"location":"1_4_docker_tp.html#8-bonus-going-further","title":"8. Bonus - Going further","text":"

    https://container.training/

    "},{"location":"1_5_be.html","title":"Bureau d'\u00e9tudes Cloud & Docker","text":"

    Link to slides

    "},{"location":"1_5_be.html#objectives-of-this-be","title":"Objectives of this BE","text":"

    This Bureau d'\u00e9tudes (BE, for short) will guide you through the essential notions to be able to manipulate with regard to cloud computer and docker,

    We will go through several steps

    • Working in a remote environment (in a GitHub CodeSpace, inside a VM)
    • Creation and ssh connection to virtual machine instances
    • Using managed storage capabilities (gcs://)
    • Creating your own docker images
    • Exchanging docker images through a Container Registry
    • Pulling and running docker images created by your teammates

    In particular, this workflow:

    Warning

    Please read all the text in the question before executing the step-by-step instructions because there might be help or indications after the instructions.

    "},{"location":"1_5_be.html#how-to-run-this-be","title":"How to run this BE","text":"

    The best way to run this BE is to setup a Github Codespace VM and install the google cloud sdk. Refer to the previous hands-on to learn more

    We will be using the gcloud CLI for the following:

    • Create a GCE Virtual Machine
    • Connect to SSH with port forwarding to said machine

    For the rest of this walkthrough, if it is specified \"from your local machine\", this will be \"github codespace\"

    If it is specified \"inside the VM\", this means that you should run it inside the GCE VM, which means you need to connect to it using an SSH tunnel first...

    \ud83d\ude4f\ud83c\udffb Use Google Chrome without any ad blockers if you have any issues, or use the local VSCode + CodeSpace extension

    Warning

    \u26a0\ufe0f Normally you will do everything from your browser, connected to the github codespace, so it should work \u26a0\ufe0f if you have any issues, switch your wi-fi connection between eduroam (preferred), isae-edu or a 4G hotspot

    "},{"location":"1_5_be.html#team-composition-setup","title":"Team composition & Setup","text":"

    You should be in team of 5, however this will work with a minimum of 2 people.

    Each team member picks a different cute mascot and remembers it:

    • \ud83d\udc08 cat
    • \ud83d\udc15 dog
    • \ud83d\udc7d (baby) yoda
    • \ud83e\udd89 owl
    • \ud83d\udc3c panda

    Find a groupname, because you will need it for the next steps

    One of the team member will add the others into their GCP project so that everyone can collaborate.

    Designate a \"project manager\" (the person who is the most comfortable with the google cloud platform UI). That person will have the hard task of giving access to his/her GCP project to the other team members to enable collaboration.

    This means that the project of the \"team leader\" will be billed a little more for the duration of this BE, so please be kind with the project and apply good cloud hygiene :)

    Rest assured, this will not cost very much !

    How to do that ?

    Go to the \"IAM & Admin / IAM\" section of the Google Cloud Console, then locate the \"grant access\",

    Grant access to your each of your teammates using the \"Editor Role\" (Basic -> Editor)

    Here are some screenshots to help you

    "},{"location":"1_5_be.html#1-build-ship-run-deploy-as-a-team","title":"1 - Build, Ship, Run (Deploy) as a Team","text":""},{"location":"1_5_be.html#11-build","title":"1.1 - Build","text":""},{"location":"1_5_be.html#111-start-development-environment-github-codespace","title":"1.1.1 - Start Development Environment (Github Codespace)","text":"
    • Launch your Github Codespaces instance from the preconfigured repository https://github.com/fchouteau/isae-cloud-computing-codespace
    • Ensure that the google cloud sdk is installed (it should be done automatically) and configured to the project that you were given access to (run gcloud init like last time)
    "},{"location":"1_5_be.html#112-get-the-necessary-resources-from-google-cloud-storage","title":"1.1.2 - Get the necessary resources from Google Cloud Storage","text":"

    From your github codespace,

    The resources are located at the URI gs://fchouteau-isae-cloud/be/${MASCOT},

    Your ${MASCOT} name is either:

    • cat
    • dog
    • owl
    • panda
    • yoda

    I advise you to export MASCOT=.... to remember it :)

    ONLY DOWNLOAD your mascot resources (no cheating ! this will only cause confusion later)

    Download them to your instance using the gcloud cli (refer to your previous work for more information)

    Hint

    gsutil -m cp -r {source} {destination}\n
    Remember that google storage URIs always begin with gs://

    Go to (cd) the folder where you downloaded your resources

    You should see a file structure like this

    fchouteau@be-cloud-mascot:~/be$ tree yoda  -L 2\nyoda\n\u251c\u2500\u2500 app.py\n\u251c\u2500\u2500 AUTHOR.txt\n\u251c\u2500\u2500 Dockerfile\n\u251c\u2500\u2500 favicon.ico\n\u251c\u2500\u2500 imgs\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 1.gif\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 2.gif\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 3.gif\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 4.gif\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 5.gif\n\u2514\u2500\u2500 template.html.jinja2\n\n1 directory, 10 files\n
    "},{"location":"1_5_be.html#113-build-your-docker-image","title":"1.1.3 - Build your docker image","text":"

    Question

    • Look at the Dockerfile (cat Dockerfile), what does it seem to do ?
    • Look at app.py (cat app.py). What is Flask ? What does it seem to do ?
    • Edit the file AUTHOR.txt to add your name instead of the placeholder
    • Refer to your previous work to build the image

    Danger

    On which port is your flask app running ? (cat Dockerfile) Note it carefully ! You will need to communicate it to your teammate :)

    • When building the image, name it appropriately... like eu.gcr.io/${PROJECT_ID}/webapp-gif:${GROUPNAME}-${MASCOT}-1.0 !
    Hint

    to get your project id:

    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\n

    • now if you list your images you should see it !
    REPOSITORY                                      TAG                 IMAGE ID            CREATED             SIZE\neu.gcr.io/{your project name}/{your-app}    1.0                 d1c5993848bf        2 minutes ago       62.1MB\n

    Question

    Describe concisely to your past self what is a Docker Image

    "},{"location":"1_5_be.html#12-ship","title":"1.2 - Ship","text":""},{"location":"1_5_be.html#121-push-your-docker-image-in-the-shared-container-registry","title":"1.2.1 - Push your Docker image in the shared Container Registry","text":"
    • One of the team member must first create a shared Artifact Registry

    • Now push your image on the shared container registry

    • Help your team mates so that everybody can build his/her Docker Image

    Question

    Describe succintly to your past self what is a Container Registry

    "},{"location":"1_5_be.html#13-run-deploy","title":"1.3 - Run (deploy)","text":""},{"location":"1_5_be.html#131-create-google-compute-engine-vm","title":"1.3.1 - Create Google Compute Engine VM","text":"

    Each team member creates a separate GCE Instance (Virtual Machine) on the same project,

    Here, you will create a Google Compute Engine instance, preconfigured with everything you need,

    If you use the google cloud CLI (from your codespace), you can use this

    First, set a variable with the name of your instance,

    export INSTANCE_NAME=\"be-cloud-mascot-{yourgroup}-{yourname}\" # Don't forget to replace values !\n

    Then create your VM

    gcloud compute instances create $INSTANCE_NAME \\\n        --zone=\"europe-west1-b\" \\\n        --image-family=\"common-cpu\" \\\n        --image-project=\"deeplearning-platform-release\" \\\n        --maintenance-policy=\"TERMINATE\" \\\n        --scopes=\"storage-rw\" \\\n        --machine-type=\"n1-standard-1\" \\\n        --boot-disk-size=\"50GB\" \\\n        --boot-disk-type=\"pd-standard\"\n

    If you have an issue with quota, use any of europe-west4-{a,b,c,d} or europe-west1-{b,c,d}

    If you use the web interface, follow this

    Your browser does not support the video tag.

    Question

    Describe concisely to your past self what is a Virtual Machine and what is Google Compute Engine

    "},{"location":"1_5_be.html#132-connect-using-ssh-to-the-instance","title":"1.3.2 - Connect using SSH to the instance","text":"

    If you are using the google cloud sdk from github codespace, you can connect to ssh using the usual command.

    Tunnel the following ports to your local machine:

    • 8080: This is reserved for a jupyter lab session by default, it makes it easy to see & edit text
    • 8081: You will neeed to run containers and expose them on a port
    Hint
    gcloud compute ssh {user}@{instance} -- \\\n    -L {client-port}:localhost:{server-port} \\\n    -L {client-port-2}:localhost:{server-port-2}\n

    Go to your browser and connect to http://localhost:8080, you should be in a jupyter lab where you can access a terminal, a text editor etc...

    Question

    Where is this jupyter lab hosted ? Describe concisely what is a SSH Tunnel and what is port forwarding

    "},{"location":"1_5_be.html#133-pull-docker-images-from-your-teammate","title":"1.3.3 - Pull Docker Images from your teammate","text":"

    You should be inside the your VM,

    Question

    How to check that you're inside your VM ? On your terminal you should see user@hostname at the beginning. Hostname should be the name of your VM

    • Select another mascot and pull the corresponding docker image from the registry

    • List the docker images you have docker images.

    "},{"location":"1_5_be.html#134-run-docker-containers-from-their-docker-images","title":"1.3.4 - Run Docker Containers from their Docker Images","text":"
    • Run your container while mapping the correct port to your VM 8081. Which port is it ? Well, ask the one who built the image.

    • When running the container, setup the USER environment variable to your name !

    Hint

    the port is not the same as yours if you don't set the username, it will come to bite your later ;)

    "},{"location":"1_5_be.html#135-display-the-results-share-them","title":"1.3.5 - Display the results & share them","text":"
    • You just launched a webapp on the port 8081 of your remote instance.

    • If you have a ssh tunnel directly from your laptop, ensure that you made a tunnel for your port 8081 to any port of your machine then, go to http://localhost:(your port) inside your browser. The resulting webpage should appear

    • If you are using github codespace, open web preview on port 8081 (you should have a tunnel running between your github codespace and your GCE instance)

    • You can also publicly share the codespace preview link so that other people can see your results

    Checklist

    • The webpage should display the mascot your chose to run
    • The webpage should display the name of the author (not you)
    • The webpage should display your name

    Bug

    If any of the three item above are missing, find the bug and solve it :)

    Example

    Try to refresh the webpage to make more gifs appear

    Share your result on slack

    "},{"location":"1_5_be.html#14-cleanup-the-gcp-project","title":"1.4. Cleanup the GCP project","text":"
    • Remove your VMs (DELETE them)
    • Remove images from the container registry
    "},{"location":"1_5_be.html#15-yay","title":"1.5. Yay !","text":"

    Success

    \ud83c\udf89 you have successfully finished the mandatory part of the BE. You know how to manipulate the basic notions around cloud computing and docker so that you won't be completely lost when someone will talk about it

    Continue the BE below (you can do it alone or by group of 2 or 3) to discover more nice things !

    "},{"location":"1_5_be.html#2-another-deployment","title":"2 - Another deployment","text":""},{"location":"1_5_be.html#21-lets-discover-streamlit","title":"2.1 - Let's discover Streamlit","text":"

    We will now introduce streamlit, which is a very nice tool to build quick webapps in python !

    In this TP you will build your first interactive webapp in python and package it in a container.

    First, look at this video,

    Your browser does not support the video tag.

    Then, take a look at an introduction to streamlit and the streamlit application gallery

    Question

    Can you describe what exactly is streamlit ? Could you find any way it could be useful to you ?

    "},{"location":"1_5_be.html#22-your-first-streamlit-application","title":"2.2 Your first streamlit application","text":"

    Take a look at the code below,

    import streamlit as st\nfrom streamlit_image_comparison import image_comparison\nimport cv2\n\nst.set_page_config(\"Webb Space Telescope vs Hubble Telescope\", \"\ud83d\udd2d\")\n\nst.header(\"\ud83d\udd2d J. Webb Space Telescope vs Hubble Telescope\")\n\nst.write(\"\")\n\"This is a reproduction of the fantastic [WebbCompare](https://www.webbcompare.com/index.html) app by [John Christensen](https://twitter.com/JohnnyC1423). It's built in Streamlit and takes only 10 lines of Python code. If you like this app, please star [John's original repo](https://github.com/JohnEdChristensen/WebbCompare)!\"\nst.write(\"\")\n\nst.markdown(\"### Southern Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/southern_nebula_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/southern_nebula_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\n\nst.markdown(\"### Galaxy Cluster SMACS 0723\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/deep_field_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/deep_field_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Carina Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/carina_2800.png\",\n    img2=\"https://www.webbcompare.com/img/webb/carina_2800.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Stephan's Quintet\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/stephans_quintet_2800.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/stephans_quintet_2800.jpg\",\n    label1=\"Hubble\"\n    label2=\"Webb\",\n)\n

    Question

    Can you describe, by reading the documentation, what does the code do ?

    "},{"location":"1_5_be.html#23-local-deployment-in-codespace","title":"2.3 - Local deployment in codespace","text":"

    First, we will install in the codespace the dependencies for our application,

    pip install streamlit streamlit opencv-python-headless streamlit-image-comparison

    Then create a file streamlit_jswt.py and copy/paste the code above.

    Then execute it streamlit run streamlit_jswt.py

    This will launch the application on the port 8501 (by default) of our codespace. You can connect to it as usual.

    \ud83e\udd29 Nice, isn't it ?

    Now you can quit the server.

    "},{"location":"1_5_be.html#24-a-more-complex-application","title":"2.4 - A more complex application","text":"

    We will run and package a more complex application, but a lot more useful for your deep learning class

    Clone the following repository git clone https://github.com/fchouteau/isae-demo-streamlit-activation-functions.git

    cd to the directory cd isae-demo-streamlit-activation-functions then as last time, install the dependencies pip install -r requirements.txt then run the application streamlit run app.py

    You can visualize it as last time. This should be quite useful for you given you just left the Deep Learning Class !

    "},{"location":"1_5_be.html#25-transform-application-into-docker-image","title":"2.5 - Transform application into docker image","text":"

    Refer to the previous TP where we built a website to convert what we just did into a docker image.

    In short, create a Dockerfile that inherits from FROM python:3.10, copy all the app files COPY ./ /app/, install the dependencies RUN pip install -r /app/requirements.txt, expose the port EXPOSE 8501 then run as the app as an entrypoint CMD [\"python\", \"-m\", \"streamlit\", \"run\", \"app.py\"].

    You should be able to do it yourself, but if you need help, here's what your Dockerfile looks like :

    Solution
      FROM python:3.10\n\n  COPY ./ /app/\n  RUN pip install -r /app/requirements.txt\n\n  EXPOSE 8501\n\n  WORKDIR /app/\n\n  CMD [\"python\", \"-m\", \"streamlit\", \"run\", \"app.py\"]\n

    Then build your image, and run it locally (using the correct port forwarding which is 8501)

    Solution
      # build\n  docker build -t eu.gcr.io/sdd2324/streamlit-fch:1.0 -f Dockerfile . \n  # run\n  docker run --rm -p 8501:8501 eu.gcr.io/sdd2324/streamlit-fch:1.0 # change this name to yours\n

    Once you know it works locally, tag it and push it to our shared container registry

    Solution
      # push to registry\n  docker push eu.gcr.io/sdd2324/streamlit-fch:1.0 # change this name to yours\n
    "},{"location":"1_5_be.html#26-deployment-in-a-vm","title":"2.6 - Deployment in a VM","text":"

    We will now create yet another VM to deploy our application. This time, we will deploy directly our container in a VM without connecting to ssh to it,

    Don't forget to change the instance name & zone according to what you did previously.

    Take a note to the --container-image and change it to the name of the image you just pushed

    gcloud compute instances create-with-container fch-streamlit-demo \\\n    --project=[your project] \\\n    --zone=europe-west1-b \\\n    --machine-type=n1-standard-1 \\\n    --image=projects/cos-cloud/global/images/cos-stable-109-17800-66-27 \\\n    --boot-disk-size=10GB \\\n    --boot-disk-type=pd-standard \\\n    --container-image=[your image] \\\n    --container-restart-policy=always\n

    Compared to previously, note that we explicitly specify a container to deploy to the VM and we don't use ubuntu but a container optimized OS.

    "},{"location":"1_5_be.html#27-publish-the-results-on-the-web","title":"2.7 - Publish the results on the web","text":"

    First run this command in your codespace. This will expose the port 8501 to the web

    gcloud compute --project=[your project] firewall-rules create open-8501 --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:8501 --source-ranges=0.0.0.0/0\n
    Then, locate the public IP of your VM using the google cloud console.

    Finally, take your phone (it won't work over ISAE wifi, maybe on eduroam) and connect to its port 8501, http://ip-of-the-machine:8501

    \ud83e\uddd0 The app should appear !

    We just deployed a webapp written in python to a public website :)

    "},{"location":"1_5_be.html#28-cleanup","title":"2.8 - Cleanup","text":"

    As usual, cleanup your resources. Delete the GCE VM.

    "},{"location":"1_5_be.html#29-yay","title":"2.9 - Yay !","text":"

    Success

    \ud83c\udf7e you have successfully finished the all parts of the BE. You know how to manipulate the basic notions around cloud computing and docker so that you won't be completely lost when someone will talk about it

    Finish the previous hands-on (cloud & docker) if you have time. In particular, take a look at the docker-compose section.

    "},{"location":"1_5_be.html#3-im-finished-now-im-bored","title":"3 - I'm finished, now I'm bored !","text":"

    I advise you to ensure you've done this part of the previous GCP hands-on

    "},{"location":"1_6_conclusion.html","title":"Recap'","text":"

    Link to slides

    "},{"location":"1_7_readings.html","title":"Readings","text":""},{"location":"1_7_readings.html#about-cloud-computing","title":"About Cloud Computing","text":"
    • Buyya, R., Srirama, S. N., Casale, G., Calheiros, R., Simmhan, Y., Varghese, B., ... & Toosi, A. N. (2018). A manifesto for future generation cloud computing: Research directions for the next decade. ACM computing surveys (CSUR), 51(5), 1-38.

    • On sustainable data centers and energy use (intro)

    • The NIST Definitions of Cloud Computing

    • Open Data: Open Sentinel 2 archive on AWS

    • Environmental Impact of Cloud vs On Premise

    • Environmental Impact of cloud vs on-premise medium blog post

    • Paper from Natural Resources Defense Council on Cloud vs On-Premise

    • Anecdotes about Cloud Computing

    "},{"location":"1_7_readings.html#about-containers","title":"About Containers","text":"
    • Docker whitepaper: Docker and the way of the Devops

    • What exactly is Docker ? Simple explanation from a medium blog post

    "},{"location":"1_7_readings.html#about-orchestration","title":"About Orchestration","text":"
    • Verma, A., Pedrosa, L., Korupolu, M., Oppenheimer, D., Tune, E., & Wilkes, J. (2015, April). Large-scale cluster management at Google with Borg. In Proceedings of the Tenth European Conference on Computer Systems (pp. 1-17).

    • Kubernetes Comic to learn about Kubernetes in a fun way https://cloud.google.com/kubernetes-engine/kubernetes-comic

    "},{"location":"1_8_deployment.html","title":"Intro to Deployment & BE","text":"

    Link to slides

    "},{"location":"1_8_deployment_tp.html","title":"Deploy your ML model into production","text":""},{"location":"1_8_deployment_tp.html#objectifs","title":"Objectifs","text":"

    L'objectif du TP est de convertir ce notebook en deux services containeris\u00e9s : Un back-end qui est un serveur qui re\u00e7oit des images et sort des pr\u00e9dictions, ainsi qu'un front-end qui vous permet d'envoyer des images au mod\u00e8le et d'afficher les pr\u00e9dictions sur lesdites images,

    Afin de gagner du temps, les dockerfiles ont d\u00e9j\u00e0 \u00e9t\u00e9 construits et sont pr\u00eats \u00e0 \u00eatre test\u00e9s et d\u00e9ploy\u00e9s. Si vous souhaitez rentrer dans les d\u00e9tails et \u00e9crire vous-m\u00eame le code, vous pouvez consulter la version longue de ce TP (qui n'est pas \u00e0 jour).

    Nous allons donc voir : - La cr\u00e9ation d'un docker \"backend\" qui contient le mod\u00e8le derri\u00e8re une \"API\" - L'interaction avec ce docker - La cr\u00e9ation d'un docker \"frontend\" qui contient une IHM permettant d'interagir plus facilement avec le backend - docker-compose pour lancer des applications multi-container - Le d\u00e9ploiement du backend sur GCP - Le test final

    Nous nous pla\u00e7ons dans un contexte \"microservices\" o\u00f9 le front-end et le backend sont 2 containers diff\u00e9rents. Il aurait \u00e9t\u00e9 possible de n'en faire qu'un qui contient les deux (un \"monolithe\"). Une architecture microservices peut avoir certains avantages (modularit\u00e9, maintenance) mais est plus complexe \u00e0 mettre en oeuvre.

    "},{"location":"1_8_deployment_tp.html#1-mise-en-place-du-projet-google-cloud-platform","title":"1 - Mise en place du projet Google Cloud Platform","text":"

    Maintenant que vous avez vos cr\u00e9dits, suivez les instructions du 1er TP Google Cloud Platform pour cr\u00e9er votre propre projet GCP

    "},{"location":"1_8_deployment_tp.html#2-demarrage-du-code-space","title":"2 - D\u00e9marrage du Code Space","text":"

    D\u00e9marrez un github codespace depuis le repository https://github.com/fchouteau/isae-cloud-computing-codespace

    Il est n\u00e9c\u00e9ssaire d'utiliser un codespace \u00e0 partir de ce repository car il contient tout ce dont vous avez besoin pour ce TP.

    Normalement, une fois le codespace lanc\u00e9, vous devriez obtenir une interface vscode avec deux dossiers dont un nomm\u00e9 tp-deployment. Rendez-vous dans ce dossier,

    Il y a plusieurs ressources : le frontend qui contient de quoi construire l'IHM, le backend qui contient de quoi construire le serveur, et des ressources de tests.

    "},{"location":"1_8_deployment_tp.html#3-construction-et-tests-du-backend","title":"3 - Construction et tests du backend","text":"

    Le README.md du dossier backend contient des d\u00e9tails concernant la construction du serveur et de son API (qui \u00e9tait auparavant laiss\u00e9 en exercice). Nous utilisons FastAPI qui un framework de construction d'applications Web.

    Le code principal se trouve dans app.py. On d\u00e9clare des \"routes\" (des m\u00e9thodes d'interactions avec le serveur) puis on leur assigne des fonctions.

    Par exemple, vous pouvez regarder la route predict qui est associ\u00e9e \u00e0 la fonction du m\u00eame nom.

    @app.post(\n    \"/predict\",\n    description=\"Send a base64 encoded image + the model name, get detections\",\n    response_description=\"Detections + Processing time\",\n    response_model=Result,\n)\n

    Cette fonction effectue l'inf\u00e9rence sur l'image qui est donn\u00e9e via la requ\u00eate REST vers la route /predict.

    Afin de mieux illustrer les possibilit\u00e9s d'int\u00e9raction avec ce serveur, nous allons le lancer localement, en utilisant l'image docker d\u00e9j\u00e0 construite (Remarque: vous pouvez reproduire le docker en lan\u00e7ant docker build -f Dockerfile -t eu.gcr.io/third-ridge-138414/yolo-v5:1.2)

    Lancez la commande suivante docker run --rm -p 8000:8000 eu.gcr.io/third-ridge-138414/yolo-v5:1.2

    Cela lance un container depuis l'image docker du backend en exposant le port 8000.

    Connectez-vous au port 8000 du codespace. Vous devriez avoir une page vierge qui contient \"YOLO-V5 WebApp created with FastAPI\"

    Nous allons maintenant regarder la documentation de l'application. Celle-ci est automatiquement g\u00e9n\u00e9r\u00e9e \u00e0 partir du code de app.py et est disponible sur la route /docs

    Connectez-vous donc \u00e0 la route /docs en rajoutant ce terme \u00e0 l'URL du codespace.

    Cette page web d\u00e9crit les diff\u00e9rentes routes accessibles et leurs m\u00e9thodes d'int\u00e9raction, ainsi que les formats d'entr\u00e9e et de sortie. C'est la documentation de l'API et lorsque vous interagissez avec le serveur, c'est la seule chose dont vous avez besoin.

    Nous allons maintenant interagir avec ce serveur.

    Dans le dossier backend se trouve un fichier python test_webapp.py. Il va automatiquement envoyer les bonnes requ\u00eates au serveur. Executez-le (python test_webapp.py), vous devriez voir s'afficher des tests correspondants au code, ainsi que les pr\u00e9dictions des chats sur l'image cats.png

    Laissez le terminal avec le container d\u00e9marr\u00e9 pour l'instant,

    "},{"location":"1_8_deployment_tp.html#4-construction-et-tests-du-frontend","title":"4 - Construction et tests du frontend","text":"

    Comme vous aurez pu le constater, ce n'est pas tr\u00e8s intuitif d'interagir avec le backend via des scripts, on aimerait pouvoir visualiser plus facilement les pr\u00e9dictions, faire des seuils sur la confiance des objets, etc...

    Pour cela nous allons cr\u00e9er une application streamlit (remarque: pour une introduction \u00e0 streamlit rendez-vous dans la section 6 du BE)

    Dans votre codespace, d\u00e9marrez un nouveau terminal puis allez dans le dossier frontend. L\u00e0 encore, le fichier app.py contient le code de l'applicaiton streamlit. Celle-ci va r\u00e9cup\u00e9rer une image que vous allez uploader (image de votre choix) puis l'envoyer au serveur dont vous sp\u00e9cifiez l'IP dans la case en haut \u00e0 gauche.

    Nous allons lancer cette application,

    docker run --rm -p 8501:8501 --network=\"host\" eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5

    Rendez-vous sur le port 8501 de votre github codespace,

    La premi\u00e8re \u00e9tape est de renseigner l'adresse (URL) du backend. Pour tester que vous arrivez bien \u00e0 joindre le serveur, cliquez sur le bouton \"IS ALIVE\". Ce bouton (voir code dans app.py) envoie une requ\u00eate \u00e0 la route /health pour v\u00e9rifier que le serveur est vivant.

    Par d\u00e9faut, l'URL du serveur est http://localhost:8000 ce qui semble correct car nous avons ouvert un docker sur le port 8000.

    Vous pouvez maintenant tester le serveur, et s'il marche, uploader une image de votre choix avec le bouton upload puis lancer une pr\u00e9diction. Cela va uploader l'image dans le frontend, puis envoyer une requ\u00eate POST \u00e0 http://url-du-serveur/predict puis r\u00e9cup\u00e9rer les r\u00e9sultats (le json) et l'interpr\u00e9ter correctement.

    Vous noterez que nous avons d\u00e9marr\u00e9 le frontend avec l'argument --network=\"host\". Cela permet au container d'avoir acc\u00e8s au localhost (d'\u00eatre sur le m\u00eame r\u00e9seau que l'h\u00f4te). Sans cet argument, les containers sont sur des r\u00e9seaux s\u00e9par\u00e9s et ne se voient pas.

    Vous pouvez maintenant stopper les deux containers (backend et frontend)

    "},{"location":"1_8_deployment_tp.html#5-docker-compose","title":"5 - docker-compose","text":"

    Pour simplifier cette \u00e9tape de d\u00e9ploiement multi-containers qui peut \u00eatre fastidieuse (imaginez une application \u00e0 4, 5 containers !), une solution nomm\u00e9e docker-compose existe. Voir une introduction \u00e0 docker-compose

    Cette solution permet de lancer une s\u00e9rie de containers en les assignant \u00e0 un m\u00eame r\u00e9seau, de fa\u00e7on d\u00e9clarative, c'est \u00e0 dire que l'on renseigne dans un fichier de configuration la mise en place des containers.

    Notre docker-compose.yml se trouve dans le dossier tp-deployment

    version: '3'\nservices:\n  yolo:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5:1.2\"\n    ports:\n      - \"8000:8000\"\n    hostname: yolo\n  streamlit:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5\"\n    ports:\n      - \"8501:8501\"\n    hostname: streamlit\n

    Ce fichier de configuration indique qu'au lancement le frontend et le backend vont se lancer simultan\u00e9ment, exposer leurs ports respectifs, et pouvoir communiquer entre eux via leurs \"hostnames\".

    Nous allons lancer notre application par ce bias en lan\u00e7ant la commande docker-compose up

    Voir doc docker-compose : https://docs.docker.com/compose/reference/

    Cela va directement d\u00e9marrer nos deux services, que vous pouvez retrouver sur les ports 8000 (backend) et 8501 (frontend)

    Comme pr\u00e9c\u00e9demment, vous pouvez vous connecter au frontend sur le port 8501 du codespace pour interagir directement avec le backend. La petite nuance est que ce backend est disponible sur http://yolo:8000 plut\u00f4t que http://localhost:8000 car le docker-compose a nomm\u00e9 les containers avec un hostname correspondant \u00e0 celui sp\u00e9cifi\u00e9 (et les a mis en r\u00e9seau)

    Une fois que vous avez interagi avec votre d\u00e9ploiement, nous allons maintenant d\u00e9ployer le backend sur un serveur sur google cloud.

    "},{"location":"1_8_deployment_tp.html#6-deploiement-du-backend-sur-une-vm-google-compute-engine","title":"6 - Deploiement du backend sur une VM Google Compute Engine","text":"

    Nous allons maintenant d\u00e9marrer une instance de VM Google Compute Engine et directement y d\u00e9ployer un container. Vous avez d\u00e9j\u00e0 vu cette m\u00e9thode dans la section streamlit du BE

    N'oubliez pas de connecter votre github codespace \u00e0 votre projet gcp en utilisant gcloud init

    R\u00e9cup\u00e9rez votre project_id gcp : `PROJECT_ID=$(gcloud config get-value project 2> /dev/null)``

    Puis nous allons cr\u00e9er directement une VM en y d\u00e9ployant un container. Notez que l'on utilise cette fois un OS d\u00e9di\u00e9 \u00e0 l'h\u00e9bergement de containers (pas pr\u00e9vu pour s'y connecter en ssh) plut\u00f4t qu'ubuntu comme pr\u00e9c\u00e9demment.

    gcloud compute instances create-with-container fch-yolo-backend \\\n    --project=${PROJECT_ID} \\\n    --zone=europe-west1-b \\\n    --machine-type=n1-standard-2 \\\n    --image=projects/cos-cloud/global/images/cos-stable-109-17800-66-27 \\\n    --boot-disk-size=20GB \\\n    --boot-disk-type=pd-standard \\\n    --container-image=eu.gcr.io/third-ridge-138414/yolo-v5:1.2 \\\n    --container-restart-policy=always\n

    Note : Si vous utilisez votre propre projet GCP, vous devez ouvrir le port 8000 \u00e0 internet public pour pouvoir y acc\u00e9der. Utilisez cette commande :

    gcloud compute --project=${PROJECT_ID} firewall-rules create open-8000 --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:8000 --source-ranges=0.0.0.0/0 \n
    "},{"location":"1_8_deployment_tp.html#7-tests","title":"7 - Tests","text":"

    Nous allons maintenant tester que notre backend est bien d\u00e9ploy\u00e9. Il faut pour cela relancer le front-end et changer l'IP pour l'IP de la machine virtuelle pr\u00e9c\u00e9demment lanc\u00e9e

    • relancez le docker du frontend docker run --rm -p 8501:8501 eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5
    • connectez vous au port 8501 du github codespace, comme pr\u00e9c\u00e9demment, et modifiez l'IP du backend pour qu'il corresponde \u00e0 celle du serveur distant (toujours sur le port 8000)
    • si vous envoyez une requ\u00eate, elle est maintenant transmise au backend !
    "},{"location":"1_8_deployment_tp.html#8-yay","title":"8. Yay !","text":"

    Success

    \ud83c\udf7e Et voil\u00e0, vous avez d\u00e9ploy\u00e9 votre premier mod\u00e8le sur le cloud

    N'oubliez pas de supprimer votre VM GCP une fois le travail termin\u00e9

    "},{"location":"1_8_deployment_tp_long.html","title":"Deployment : Deploy your ML model in production (Version Longue de janvier 2023)","text":""},{"location":"1_8_deployment_tp_long.html#objectives","title":"Objectives","text":"

    Your first ML model in production !

    • A model behind a Restful API, packaged in a docker
    • A frontend using streamlit, packaged in a docker
    • Deploy a multi-container application using docker compose
    • Deploy the model in the docker image
    • Send it to your friends !

    Regardons ce notebook

    Il effectue les op\u00e9rations suivantes:

    • Chargement d'un mod\u00e8le
    • Chargement d'une image
    • D\u00e9tection des \"objets\" sur l'image
    • Dessin des d\u00e9tections sur l'image
    • Affichage

    L'objectif est de convertir ce notebook en deux applications :

    • L'une qui \"sert\" les pr\u00e9dictions d'un mod\u00e8le (le serveur)
    • L'une qui permet \u00e0 un utilisateur d'interagir facilement avec le mod\u00e8le en mettant en ligne sa propre image (le \"client\")

    Nous allons d\u00e9velopper tout cela dans l'environnement de d\u00e9veloppement (codespaces)

    Puis d\u00e9ployer le mod\u00e8le dans l'environnement GCP

    "},{"location":"1_8_deployment_tp_long.html#team-composition","title":"Team Composition","text":"

    C'est mieux d'\u00eatre en bin\u00f4me pour s'entraider :)

    "},{"location":"1_8_deployment_tp_long.html#configuration-du-codespace","title":"Configuration du codespace","text":"

    Nous allons utiliser github codespaces comme environnement de d\u00e9veloppement,

    Repartir de https://github.com/github/codespaces-blank

    Puis configurer ce codespace avec le google cloud sdk et configurer le projet isae-sdd

    Hint

    # Rappels : Installation du google cloud sdk\n# https://cloud.google.com/sdk/docs/install#linux\ncurl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-416.0.0-linux-x86_64.tar.gz\ntar -xf google-cloud-cli-416.0.0-linux-x86.tar.gz\n./google-cloud-sdk/install.sh\n# Type yes to add to path !\nexport PATH=./google-cloud-sdk/bin:$PATH\ngcloud init\n# login and copy the token\n# configure isae-sdd then compute zone 17\ngcloud auth configure-docker\n

    Voir les tps pr\u00e9c\u00e9dents

    Maintenant, depuis ce codespace, ouvrez un terminal et r\u00e9cup\u00e9rez les fichiers suivants :

    gsutil cp -r gs://fchouteau-isae-cloud/deployment/* .\n

    Hint

    Si vous tombez \u00e0 court de stockage dans le TP, lancez docker system prune pour nettoyer le cache docker

    "},{"location":"1_8_deployment_tp_long.html#1-converting-a-prediction-notebook-into-a-webapplication","title":"1 - Converting a prediction notebook into a webapplication","text":"

    Placez vous dans le dossier model nouvellement cr\u00e9\u00e9

    "},{"location":"1_8_deployment_tp_long.html#objectif","title":"Objectif","text":"

    Packager un mod\u00e8le de machine learning derri\u00e8re une webapplication pour pouvoir la d\u00e9ployer sur le web et servir des pr\u00e9dictions \u00e0 des utilisateurs

    Le mod\u00e8le: Un d\u00e9tecteur d'objets sur des photographies \"standard\" suppos\u00e9 marcher en temps r\u00e9el, qui sort des \"bounding boxes\" autour des objets d\u00e9tect\u00e9 dans des images

    Remarque : Le papier vaut la lecture https://pjreddie.com/media/files/papers/YOLOv3.pdf

    On r\u00e9cup\u00e8re la version disponible sur torchhub https://pytorch.org/hub/ultralytics_yolov5/ qui correspond au repository suivant https://github.com/ultralytics/yolov5

    Voici une petite explication de l'historique de YOLO https://medium.com/towards-artificial-intelligence/yolo-v5-is-here-custom-object-detection-tutorial-with-yolo-v5-12666ee1774e

    On se propose ici d'encapsuler 3 versions du mod\u00e8le (S,M,L) qui sont 3 versions +/- complexes du mod\u00e8le YOLO-V5, afin de pouvoir comparer les performances et les r\u00e9sultats

    "},{"location":"1_8_deployment_tp_long.html#deroulement","title":"D\u00e9roulement","text":"
    • Transformer un notebook de pr\u00e9diction en \u201cWebApp\u201d en remplissant app.stub.py et en le renommant en app.py
    • Packager l'application sous forme d'une image docker
    • Tester son image docker localement
    • Uploader le docker sur Google Container Registry
    "},{"location":"1_8_deployment_tp_long.html#developpement-de-apppy","title":"D\u00e9veloppement de app.py","text":"

    Regardons le app.stub.py (que l'on renommera en app.py)

    import base64\nimport io\nimport time\nfrom typing import List, Dict\n\nimport numpy as np\nimport torch\nfrom PIL import Image\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\n\nclass Input(BaseModel):\n    model: str\n    image: str\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\n# !!!! FILL ME\ndef parse_predictions(prediction: np.ndarray, classes: [str]) -> List[Detection]:\n    raise NotImplementedError\n\n\n# !!!! FILL ME\ndef load_model(model_name: str):\n    \"\"\"\"\"\"\n    raise NotImplementedError\n\n\nMODEL_NAMES = [\"yolov5s\", \"yolov5m\", \"yolov5l\"]\n\napp = FastAPI(\n    title=\"NAME ME\",\n    description=\"\"\"\n                DESCRIBE ME\n                \"\"\",\n    version=\"1.0\",\n)\n\n# !!!! FILL ME\n# This is a dictionnary that must contains a model for each key (model names), fill load model\n# example: for model_name in MODEL_NAMES: MODELS[model_name] = load_model(model_name)\n# You can also lazily load models only when they are called to avoid holding 3 models in memory\nMODELS = ...\n\n\n@app.get(\"/\", description=\"return the title\", response_description=\"FILL ME\", response_model=str)\ndef root() -> str:\n    return app.title\n\n\n@app.get(\"/describe\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef describe() -> str:\n    return app.description\n\n\n@app.get(\"/health\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef health() -> str:\n    return \"HEALTH OK\"\n\n\n@app.get(\"/models\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=List[str])\ndef models() -> [str]:\n    return MODEL_NAMES\n\n\n@app.post(\"/predict\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=Result)\ndef predict(inputs: Input) -> Result:\n\n    # get correct model\n    model_name = inputs.model\n\n    if model_name not in MODEL_NAMES:\n        raise HTTPException(status_code=400, detail=\"wrong model name, choose between {}\".format(MODEL_NAMES))\n\n    # Get the model from the list of available models\n    model = MODELS.get(model_name)\n\n    # Get & Decode image\n    try:\n        image = inputs.image.encode(\"utf-8\")\n        image = base64.b64decode(image)\n        image = Image.open(io.BytesIO(image))\n    except:\n        raise HTTPException(status_code=400, detail=\"File is not an image\")\n    # Convert from RGBA to RGB *to avoid alpha channels*\n    if image.mode == \"RGBA\":\n        image = image.convert(\"RGB\")\n\n    # Inference\n\n    # RUN THE PREDICTION, TIME IT\n    predictions = ...\n\n    # Post processing\n    classes = predictions.names\n    predictions = predictions.xyxy[0].numpy()\n\n    # Create a list of [DETECTIONS] objects that match the detection class above, using the parse_predictions method\n    detections = ...\n\n    result = Result(detections=..., time=..., model=...)\n\n    return result\n

    Dans un premier temps, vous pouvez remplir la description des \"routes\" (i.e. des fonctions de l'application):

    @app.get(\"/\", description=\"return the title\", response_description=\"FILL ME\", response_model=str)\ndef root() -> str:\n    return app.title\n\n\n@app.get(\"/describe\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef describe() -> str:\n    return app.description\n\n\n@app.get(\"/health\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef health() -> str:\n    return \"HEALTH OK\"\n\n\n@app.get(\"/models\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=List[str])\ndef models() -> [str]:\n    return MODEL_NAMES\n

    Il y a deux fonctions \u00e0 compl\u00e9ter en s'inspirant du notebook inference.ipynb. Grace au typage de python, vous avez les types d'entr\u00e9e et de sortie des deux fonctions

    La premi\u00e8re prend un tableau de type (left, top, right, bottom, confidence, class_index) et une liste de noms de classes et cr\u00e9\u00e9e une liste d'objets Detection (voir le code pour la cr\u00e9ation des objets d\u00e9tection)

    # !!!! FILL ME\ndef parse_predictions(predictions: np.ndarray, classes: [str]) -> List[Detection]:\n    raise NotImplementedError\n
    Hint
    def parse_prediction(prediction: np.ndarray, classes: [str]) -> Detection:\nx0, y0, x1, y1, cnf, cls = prediction\ndetection = Detection(\n    x_min=int(x0),\n    y_min=int(y0),\n    x_max=int(x1),\n    y_max=int(y1),\n    confidence=round(float(cnf), 3),\n    class_name=classes[int(cls)],\n)\nreturn detection\n

    La seconde fonction doit charger un mod\u00e8le via torchhub en fonction de son nom (voir le docker)

    # !!!! FILL ME\ndef load_model(model_name: str):\n    \"\"\"\"\"\"\n    raise NotImplementedError\n
    Hint
    def load_model(model_name: str) -> Dict:\n    # Load model from torch\n    model = torch.hub.load(\"ultralytics/yolov5\", model_name, pretrained=True)\n    # Evaluation mode + Non maximum threshold\n    model = model.eval()\n\nreturn model\n

    Ensuite, vous pouvez executer les fonctions de chargement de mod\u00e8le, par exemple

    # !!!! FILL ME\n# This is a dictionnary that must contains a model for each key (model names), fill load model\n# example: for model_name in MODEL_NAMES: MODELS[model_name] = load_model(model_name)\n# You can also lazily load models only when they are called to avoid holding 3 models in memory\nMODELS = {}\nfor model_name in MODEL_NAMES:\n    MODELS[model_name] = load_model(model_name)\n

    Enfin, il s'agit d'\u00e9crire un code qui effectue une pr\u00e9diction \u00e0 partir d'une image PIL et de mesurer le temps (indice: import time et t0 = time.time() ...) de pr\u00e9diction

    # RUN THE PREDICTION, TIME IT\npredictions = ...\n# Post processing\nclasses = predictions.names\npredictions = predictions.xyxy[0].numpy()\n

    Le r\u00e9sultat de predictions est un tableau numpy compos\u00e9 des colonnes left, top, right, bottom, confidence, class_index

    Il s'agit ensuite de transformer ces predictions en [Detection]

    class Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n
    # Create a list of [DETECTIONS] objects that match the detection class above, using the parse_predictions method\ndetections = parse_predictions(predictions, classes)\n
    Hint
    # Inference\nt0 = time.time()\npredictions = model(image, size=640)  # includes NMS\nt1 = time.time()\nclasses = predictions.names\n\n# Post processing\npredictions = predictions.xyxy[0].numpy()\ndetections = [parse_prediction(prediction=pred, classes=classes) for pred in predictions]\n\nresult = Result(detections=detections, time=round(t1 - t0, 3), model=model_name)\n
    "},{"location":"1_8_deployment_tp_long.html#correction","title":"Correction","text":"

    app.py

    Hint
    import base64\nimport io\nimport time\nfrom typing import List, Dict\n\nimport numpy as np\nimport torch\nfrom PIL import Image\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\n\nclass Input(BaseModel):\n    model: str\n    image: str\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\ndef parse_prediction(prediction: np.ndarray, classes: [str]) -> Detection:\n    x0, y0, x1, y1, cnf, cls = prediction\n    detection = Detection(\n        x_min=int(x0),\n        y_min=int(y0),\n        x_max=int(x1),\n        y_max=int(y1),\n        confidence=round(float(cnf), 3),\n        class_name=classes[int(cls)],\n    )\n    return detection\n\n\ndef load_model(model_name: str) -> Dict:\n    # Load model from torch\n    model = torch.hub.load(\"ultralytics/yolov5\", model_name, pretrained=True)\n    # Evaluation mode + Non maximum threshold\n    model = model.eval()\n\n    return model\n\n\n# %%\napp = FastAPI(\n    title=\"YOLO-V5 WebApp created with FastAPI\",\n    description=\"\"\"\n                Wraps 3 different yolo-v5 models under the same RESTful API\n                \"\"\",\n    version=\"1.1\",\n)\n\n# %%\nMODEL_NAMES = [\"yolov5s\", \"yolov5m\", \"yolov5l\"]\nMODELS = {}\n\n\n@app.get(\"/\", description=\"return the title\", response_description=\"title\", response_model=str)\ndef root() -> str:\n    return app.title\n\n\n@app.get(\"/describe\", description=\"return the description\", response_description=\"description\", response_model=str)\ndef describe() -> str:\n    return app.description\n\n\n@app.get(\"/version\", description=\"return the version\", response_description=\"version\", response_model=str)\ndef describe() -> str:\n    return app.version\n\n\n@app.get(\"/health\", description=\"return whether it's alive\", response_description=\"alive\", response_model=str)\ndef health() -> str:\n    return \"HEALTH OK\"\n\n\n@app.get(\n    \"/models\",\n    description=\"Query the list of models\",\n    response_description=\"A list of available models\",\n    response_model=List[str],\n)\ndef models() -> [str]:\n    return MODEL_NAMES\n\n\n@app.post(\n    \"/predict\",\n    description=\"Send a base64 encoded image + the model name, get detections\",\n    response_description=\"Detections + Processing time\",\n    response_model=Result,\n)\ndef predict(inputs: Input) -> Result:\n    global MODELS\n\n    # get correct model\n    model_name = inputs.model\n\n    if model_name not in MODEL_NAMES:\n        raise HTTPException(status_code=400, detail=\"wrong model name, choose between {}\".format(MODEL_NAMES))\n\n    # check load\n    if MODELS.get(model_name) is None:\n        MODELS[model_name] = load_model(model_name)\n\n    model = MODELS.get(model_name)\n\n    # Get Image\n    # Decode image\n    try:\n        image = inputs.image.encode(\"utf-8\")\n        image = base64.b64decode(image)\n        image = Image.open(io.BytesIO(image))\n    except:\n        raise HTTPException(status_code=400, detail=\"File is not an image\")\n    # Convert from RGBA to RGB *to avoid alpha channels*\n    if image.mode == \"RGBA\":\n        image = image.convert(\"RGB\")\n\n    # Inference\n    t0 = time.time()\n    predictions = model(image, size=640)  # includes NMS\n    t1 = time.time()\n    classes = predictions.names\n\n    # Post processing\n    predictions = predictions.xyxy[0].numpy()\n    detections = [parse_prediction(prediction=pred, classes=classes) for pred in predictions]\n\n    result = Result(detections=detections, time=round(t1 - t0, 3), model=model_name)\n\n    return result\n
    "},{"location":"1_8_deployment_tp_long.html#construire-le-docker","title":"Construire le docker","text":"
    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker build -t eu.gcr.io/${PROJECT_ID}/{you rname}{your app name}:{your version} -f Dockerfile . \n
    "},{"location":"1_8_deployment_tp_long.html#tester-le-docker","title":"Tester le docker","text":"

    Vous pouvez lancer le docker localement et le tester avec le notebook

    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker run --rm -p 8000:8000 eu.gcr.io/${PROJECT_ID}/{your-name}-{your app name}:{your version}\n

    Vous pouvez vous connecter \u00e0 votre appli via son ip publique sur le port 8000 depuis votre navigateur local

    http://{ip}:8000

    Essayez quelques routes :

    /models /docs

    "},{"location":"1_8_deployment_tp_long.html#pusher-le-docker-sur-google-container-registry","title":"Pusher le docker sur google container registry","text":"
    gcloud auth configure-docker\ndocker push eu.gcr.io/${PROJECT_ID}/{your-name}-model:{your version}\n

    Si vous devez mettre \u00e0 jour le docker, il faut incr\u00e9menter la version pour le d\u00e9ploiement

    "},{"location":"1_8_deployment_tp_long.html#liens-utiles","title":"Liens Utiles","text":"
    • https://fastapi.tiangolo.com/
    • https://requests.readthedocs.io/en/master/
    • https://testdriven.io/blog/fastapi-streamlit/
    "},{"location":"1_8_deployment_tp_long.html#2-making-a-companion-application","title":"2 - Making a companion application","text":"

    Allez dans le dossier streamlit

    "},{"location":"1_8_deployment_tp_long.html#objectif_1","title":"Objectif","text":"

    Cr\u00e9er une application \"compagnon\" qui permet de faire des requ\u00eates \u00e0 un mod\u00e8le de fa\u00e7on ergonomique et de visualiser les r\u00e9sultats

    "},{"location":"1_8_deployment_tp_long.html#deroulement_1","title":"D\u00e9roulement","text":"
    • Remplir app.stub.py, le renommer en app.py en remplissant les bons champs (s'aider des notebooks dans app/) et en cr\u00e9ant des jolies visualisations
    • Packager l'application sous forme d'une image docker
    • Tester son image docker localement
    • Uploader le docker sur Google Container Registry
    "},{"location":"1_8_deployment_tp_long.html#guide-de-developpement","title":"Guide de d\u00e9veloppement","text":"

    Regardons le APP.md

    • Remplissez le fichier avec la description de votre application

    Regardons le app.stub.py

    import requests\nimport streamlit as st\nfrom PIL import Image\nimport io\nimport base64\nfrom pydantic import BaseModel\nfrom typing import List\nimport random\n\n# ---- Functions ---\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\n@st.cache(show_spinner=True)\ndef make_dummy_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    This simulates a fake answer for you to test your application without having access to any other input from other teams\n    \"\"\"\n    # We do a dummy encode and decode pass to check that the file is correct\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n    # We do a dummy decode\n    _image = data.get(\"image\")\n    _image = _image.encode(\"utf-8\")\n    _image = base64.b64decode(_image)\n    _image = Image.open(io.BytesIO(_image))  # type: Image\n    if _image.mode == \"RGBA\":\n        _image = _image.convert(\"RGB\")\n\n    _model = data.get(\"model\")\n\n    # We generate a random prediction\n    w, h = _image.size\n\n    detections = [\n        Detection(\n            x_min=random.randint(0, w // 2 - 1),\n            y_min=random.randint(0, h // 2 - 1),\n            x_max=random.randint(w // w, w - 1),\n            y_max=random.randint(h // 2, h - 1),\n            class_name=\"dummy\",\n            confidence=round(random.random(), 3),\n        )\n        for _ in range(random.randint(1, 10))\n    ]\n\n    # We return the result\n    result = Result(time=0.1, model=_model, detections=detections)\n\n    return result\n\n\n@st.cache(show_spinner=True)\ndef make_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    Process our data and send a proper request\n    \"\"\"\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n        response = requests.post(\"{}/predict\".format(model_url), json=data)\n\n    if not response.status_code == 200:\n        raise ValueError(\"Error in processing payload, {}\".format(response.text))\n\n    response = response.json()\n\n    return Result.parse_obj(response)\n\n\n# ---- Streamlit App ---\n\nst.title(\"NAME ME BECAUSE I AM AWESOME\")\n\nwith open(\"APP.md\") as f:\n    st.markdown(f.read())\n\n# --- Sidebar ---\n# defines an h1 header\n\nmodel_url = st.sidebar.text_input(label=\"Cluster URL\", value=\"http://localhost:8000\")\n\n_model_url = model_url.strip(\"/\")\n\nif st.sidebar.button(\"Send 'is alive' to IP\"):\n    try:\n        response = requests.get(\"{}/health\".format(_model_url))\n        if response.status_code == 200:\n            st.sidebar.success(\"Webapp responding at {}\".format(_model_url))\n        else:\n            st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n    except ConnectionError:\n        st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n\ntest_mode_on = st.sidebar.checkbox(label=\"Test Mode - Generate dummy answer\", value=False)\n\n# --- Main window\n\nst.markdown(\"## Inputs\")\nst.markdown(\"Describe something... You can also add things like confidence slider etc...\")\n\n# Here we should be able to choose between [\"yolov5s\", \"yolov5m\", \"yolov5l\"], perhaps a radio button with the three choices ?\nmodel_name = ...\n\n# Here we should be able to upload a file (our image)\nimage_file = ...\n\n# Converting image, this is done for you :)\nif image_file is not None:\n    image_file.seek(0)\n    image = image_file.read()\n    image = Image.open(io.BytesIO(image))\n\nif st.button(label=\"SEND PAYLOAD\"):\n\n    if test_mode_on:\n        st.warning(\"Simulating a dummy request to {}\".format(model_url))\n        result = ...  # call the proper function\n    else:\n        result = ...  # call the proper function\n\n    st.balloons()\n\n    st.markdown(\"## Display\")\n\n    st.markdown(\"Make something pretty, draw polygons and confidence..., here's an ugly output\")\n\n    st.image(image, width=512, caption=\"Uploaded Image\")\n\n    st.text(\"Model : {}\".format(result.model))\n    st.text(\"Processing time : {}s\".format(result.time))\n\n    for detection in result.detections:\n        st.json(detection.json())\n

    La majorit\u00e9 des fonctions de requ\u00eate sont d\u00e9j\u00e0 impl\u00e9ment\u00e9es, il reste \u00e0 faire les fonctions d'entr\u00e9es utilisateurs et la visualisation

    • Entr\u00e9e: Utilisation de st.radio et st.file_uploader:

    https://docs.streamlit.io/en/stable/getting_started.html

    https://docs.streamlit.io/en/stable/api.html#streamlit.radio

    https://docs.streamlit.io/en/stable/api.html#streamlit.file_uploader

    st.markdown(\"## Inputs\")\nst.markdown(\"Select your model (Small, Medium or Large)\")\n\nmodel_name = st.radio(label=\"Model Name\", options=[\"yolov5s\", \"yolov5m\", \"yolov5l\"])\n\nst.markdown(\"Upload an image\")\n\nimage_file = st.file_uploader(label=\"Image File\", type=[\"png\", \"jpg\", \"tif\"])\n
    • Visualisations

    Exemple de code qui imite le notebook de pr\u00e9diction pour dessiner sur une image PIL

    def draw_preds(image: Image, detections: [Detection]):\n\n    class_names = list(set([detection.class_name for detection in detections]))\n\n    image_with_preds = image.copy()\n\n    # Define colors\n    colors = plt.cm.get_cmap(\"viridis\", len(class_names)).colors\n    colors = (colors[:, :3] * 255.0).astype(np.uint8)\n\n    # Define font\n    font = list(Path(\"/usr/share/fonts\").glob(\"**/*.ttf\"))[0].name\n    font = ImageFont.truetype(font=font, size=np.floor(3e-2 * image_with_preds.size[1] + 0.5).astype(\"int32\"))\n    thickness = (image_with_preds.size[0] + image_with_preds.size[1]) // 300\n\n    # Draw detections\n    for detection in detections:\n        left, top, right, bottom = detection.x_min, detection.y_min, detection.x_max, detection.y_max\n        score = float(detection.confidence)\n        predicted_class = detection.class_name\n        class_idx = class_names.index(predicted_class)\n\n        label = \"{} {:.2f}\".format(predicted_class, score)\n\n        draw = ImageDraw.Draw(image_with_preds)\n        label_size = draw.textsize(label, font)\n\n        top = max(0, np.floor(top + 0.5).astype(\"int32\"))\n        left = max(0, np.floor(left + 0.5).astype(\"int32\"))\n        bottom = min(image_with_preds.size[1], np.floor(bottom + 0.5).astype(\"int32\"))\n        right = min(image_with_preds.size[0], np.floor(right + 0.5).astype(\"int32\"))\n\n        if top - label_size[1] >= 0:\n            text_origin = np.array([left, top - label_size[1]])\n        else:\n            text_origin = np.array([left, top + 1])\n\n        # My kingdom for a good redistributable image drawing library.\n        for r in range(thickness):\n            draw.rectangle([left + r, top + r, right - r, bottom - r], outline=tuple(colors[class_idx]))\n        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=tuple(colors[class_idx]))\n\n        if any(colors[class_idx] > 128):\n            fill = (0, 0, 0)\n        else:\n            fill = (255, 255, 255)\n\n        draw.text(text_origin, label, fill=fill, font=font)\n\n        del draw\n\n    return image_with_preds\n

    Utilisation (exemple)

        if test_mode_on:\n        st.warning(\"Simulating a dummy request to {}\".format(model_url))\n        result = ...  # call the proper function\n    else:\n        result = ...  # call the proper function\n\n    st.balloons()\n\n    st.markdown(\"## Display\")\n\n    st.text(\"Model : {}\".format(result.model))\n    st.text(\"Processing time : {}s\".format(result.time))\n\n    image_with_preds = draw_preds(image, result.detections)\n    st.image(image_with_preds, width=1024, caption=\"Image with detections\")\n\n    st.markdown(\"### Detection dump\")\n    for detection in result.detections:\n        st.json(detection.json())\n
    "},{"location":"1_8_deployment_tp_long.html#corection-apppy","title":"Corection app.py","text":"Hint
    import base64\nimport io\nimport random\nfrom pathlib import Path\nfrom typing import List\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport requests\nimport streamlit as st\nfrom PIL import Image\nfrom PIL import ImageDraw, ImageFont\nfrom pydantic import BaseModel\n\n# ---- Functions ---\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\n@st.cache(show_spinner=True)\ndef make_dummy_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    This simulates a fake answer for you to test your application without having access to any other input from other teams\n    \"\"\"\n    # We do a dummy encode and decode pass to check that the file is correct\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n    # We do a dummy decode\n    _image = data.get(\"image\")\n    _image = _image.encode(\"utf-8\")\n    _image = base64.b64decode(_image)\n    _image = Image.open(io.BytesIO(_image))  # type: Image\n    if _image.mode == \"RGBA\":\n        _image = _image.convert(\"RGB\")\n\n    _model = data.get(\"model\")\n\n    # We generate a random prediction\n    w, h = _image.size\n\n    detections = [\n        Detection(\n            x_min=random.randint(0, w // 2 - 1),\n            y_min=random.randint(0, h // 2 - 1),\n            x_max=random.randint(w // w, w - 1),\n            y_max=random.randint(h // 2, h - 1),\n            class_name=\"dummy\",\n            confidence=round(random.random(), 3),\n        )\n        for _ in range(random.randint(1, 10))\n    ]\n\n    # We return the result\n    result = Result(time=0.1, model=_model, detections=detections)\n\n    return result\n\n\n@st.cache(show_spinner=True)\ndef make_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    Process our data and send a proper request\n    \"\"\"\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n        response = requests.post(\"{}/predict\".format(model_url), json=data)\n\n    if not response.status_code == 200:\n        raise ValueError(\"Error in processing payload, {}\".format(response.text))\n\n    response = response.json()\n\n    return Result.parse_obj(response)\n\n\ndef draw_preds(image: Image, detections: [Detection]):\n\n    class_names = list(set([detection.class_name for detection in detections]))\n\n    image_with_preds = image.copy()\n\n    # Define colors\n    colors = plt.cm.get_cmap(\"viridis\", len(class_names)).colors\n    colors = (colors[:, :3] * 255.0).astype(np.uint8)\n\n    # Define font\n    font = list(Path(\"/usr/share/fonts\").glob(\"**/*.ttf\"))[0].name\n    font = ImageFont.truetype(font=font, size=np.floor(3e-2 * image_with_preds.size[1] + 0.5).astype(\"int32\"))\n    thickness = (image_with_preds.size[0] + image_with_preds.size[1]) // 300\n\n    # Draw detections\n    for detection in detections:\n        left, top, right, bottom = detection.x_min, detection.y_min, detection.x_max, detection.y_max\n        score = float(detection.confidence)\n        predicted_class = detection.class_name\n        class_idx = class_names.index(predicted_class)\n\n        label = \"{} {:.2f}\".format(predicted_class, score)\n\n        draw = ImageDraw.Draw(image_with_preds)\n        label_size = draw.textsize(label, font)\n\n        top = max(0, np.floor(top + 0.5).astype(\"int32\"))\n        left = max(0, np.floor(left + 0.5).astype(\"int32\"))\n        bottom = min(image_with_preds.size[1], np.floor(bottom + 0.5).astype(\"int32\"))\n        right = min(image_with_preds.size[0], np.floor(right + 0.5).astype(\"int32\"))\n\n        if top - label_size[1] >= 0:\n            text_origin = np.array([left, top - label_size[1]])\n        else:\n            text_origin = np.array([left, top + 1])\n\n        # My kingdom for a good redistributable image drawing library.\n        for r in range(thickness):\n            draw.rectangle([left + r, top + r, right - r, bottom - r], outline=tuple(colors[class_idx]))\n        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=tuple(colors[class_idx]))\n\n        if any(colors[class_idx] > 128):\n            fill = (0, 0, 0)\n        else:\n            fill = (255, 255, 255)\n\n        draw.text(text_origin, label, fill=fill, font=font)\n\n        del draw\n\n    return image_with_preds\n\n\n# ---- Streamlit App ---\n\nst.title(\"Yolo v5 Companion App\")\n\nst.markdown(\n    \"A super nice companion application to send requests and parse results\\n\"\n    \"We wrap https://pytorch.org/hub/ultralytics_yolov5/\"\n)\n\n# ---- Sidebar ----\n\ntest_mode_on = st.sidebar.checkbox(label=\"Test Mode - Generate dummy answer\", value=False)\n\nst.sidebar.markdown(\"Enter the cluster URL\")\nmodel_url = st.sidebar.text_input(label=\"Cluster URL\", value=\"http://localhost:8000\")\n\n_model_url = model_url.strip(\"/\")\n\nif st.sidebar.button(\"Send 'is alive' to IP\"):\n    try:\n        health = requests.get(\"{}/health\".format(_model_url))\n        title = requests.get(\"{}/\".format(_model_url))\n        version = requests.get(\"{}/version\".format(_model_url))\n        describe = requests.get(\"{}/describe\".format(_model_url))\n\n        if health.status_code == 200:\n            st.sidebar.success(\"Webapp responding at {}\".format(_model_url))\n            st.sidebar.json({\"title\": title.text, \"version\": version.text, \"description\": describe.text})\n        else:\n            st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n    except ConnectionError:\n        st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n\n\n# ---- Main window ----\n\nst.markdown(\"## Inputs\")\nst.markdown(\"Select your model (Small, Medium or Large)\")\n\n# Data input\nmodel_name = st.radio(label=\"Model Name\", options=[\"yolov5s\", \"yolov5m\", \"yolov5l\"])\n\nst.markdown(\"Upload an image\")\n\nimage_file = st.file_uploader(label=\"Image File\", type=[\"png\", \"jpg\", \"tif\"])\n\nconfidence_threshold = st.slider(label=\"Confidence filter\", min_value=0.0, max_value=1.0, value=0.0, step=0.05)\n\n# UploadFile to PIL Image\nif image_file is not None:\n    image_file.seek(0)\n    image = image_file.read()\n    image = Image.open(io.BytesIO(image))\n\nst.markdown(\"Send the payload to {}/predict\".format(_model_url))\n\n# Send payload\nif st.button(label=\"SEND PAYLOAD\"):\n    if test_mode_on:\n        st.warning(\"Simulating a dummy request to {}\".format(model_url))\n        result = make_dummy_request(model_url=_model_url, model=model_name, image=image)\n    else:\n        result = make_request(model_url=_model_url, model=model_name, image=image)\n\n    st.balloons()\n\n    # Display results\n    st.markdown(\"## Display\")\n\n    st.text(\"Model : {}\".format(result.model))\n    st.text(\"Processing time : {}s\".format(result.time))\n\n    detections = [detection for detection in result.detections if detection.confidence > confidence_threshold]\n\n    image_with_preds = draw_preds(image, detections)\n    st.image(image_with_preds, width=1024, caption=\"Image with detections\")\n\n    st.markdown(\"### Detection dump\")\n    for detection in result.detections:\n        st.json(detection.json())\n

    Note

    Le test mode servait pour un ancien BE. Si vous avez tout fait dans l'ordre vous ne devriez pas en avoir besoin

    "},{"location":"1_8_deployment_tp_long.html#construire-le-docker_1","title":"Construire le docker","text":"
    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker build -t eu.gcr.io/${PROJECT_ID}/{your app name}:{your version} -f Dockerfile .\n
    "},{"location":"1_8_deployment_tp_long.html#tester-le-docker_1","title":"Tester le docker","text":"

    Warning

    Malheureusement, sur github codespace cela ne semble pas fonctionner. Nous allons devoir partir du principe que cela fonctionne du premier coup ! Le mieux est donc de s'assurer que le app.py correspond \u00e0 la correction puis de passer \u00e0 la section suivante

    Au lieu de faire streamlit run app.py, vous pouvez lancer le docker localement et aller sur {ip}:8501 pour tester le docker

    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker run --rm -p 8501:8501 eu.gcr.io/${PROJECT_ID}/{your app name}:{your version}\n

    Vous pouvez vous rendre sur l'ip de la machine sur le port 8501

    Indiquez l'ip de la machine port 8000 \u00e0 gauche

    "},{"location":"1_8_deployment_tp_long.html#pousser-le-docker-sur-google-container-registry","title":"Pousser le docker sur google container registry","text":"
    gcloud auth configure-docker\ndocker push eu.gcr.io/${PROJECT_ID}/{your-name}-frontend:{your version}\n
    "},{"location":"1_8_deployment_tp_long.html#liens-utiles_1","title":"Liens Utiles","text":"
    • Doc Streamlit
    "},{"location":"1_8_deployment_tp_long.html#4-deployer-le-modele-et-lux-sur-linstance-gcp","title":"4 - D\u00e9ployer le mod\u00e8le et l'UX sur l'instance GCP","text":"

    Nous allons cr\u00e9er une machine virtuelle dans laquelle nous allons lancer les deux containers

    "},{"location":"1_8_deployment_tp_long.html#41-creation-de-la-vm","title":"4.1 Cr\u00e9ation de la VM","text":"

    Nous allons directement cr\u00e9er une machine avec le container du mod\u00e8le d\u00e9j\u00e0 lanc\u00e9

    Commen\u00e7ons par cr\u00e9er une instance GCP bien configur\u00e9e depuis laquelle se connecter:

    N'oubliez pas de renommer le nom de votre instance

    export INSTANCE_NAME=\"tp-deployment-{yourgroup}-{yourname}\" # Don't forget to replace values !\n
    gcloud compute instances create $INSTANCE_NAME \\\n        --zone=\"europe-west1-b\" \\\n        --machine-type=\"n1-standard-2\" \\\n        --image-family=\"common-cpu\" \\\n        --image-project=\"deeplearning-platform-release\" \\\n        --maintenance-policy=TERMINATE \\\n        --scopes=\"storage-rw\" \\\n        --boot-disk-size=75GB\n

    R\u00e9cuperez l'ip publique de la machine (via l'interface google cloud ou bien en faisant gcloud compute instances list | grep {votre instance} et notez l\u00e0 bien

    Depuis le github codespace, connectez vous \u00e0 la machine

        gcloud compute ssh {user}@{instance}\n
    "},{"location":"1_8_deployment_tp_long.html#42-execution-des-containers","title":"4.2 Execution des containers","text":"

    Hint

    A executer dans la VM GCP

    On va utiliser docker compose pour lancer les deux applications en simultan\u00e9 de sorte \u00e0 ce qu'elles communiquent

    Plus d'infos sur docker compose

    • Fermez tous les dockers etc.
    • Cr\u00e9ez un fichier docker-compose.yml

    Sur votre codespace, cr\u00e9ez ce fichier et modifiez le nom des images avec celles que vous avez utilis\u00e9es (respectivement model et frontend)

    version: '3'\nservices:\n  yolo:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5:1.2\"\n    ports:\n      - \"8000:8000\"\n    hostname: yolo\n  streamlit:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.2\"\n    ports:\n      - \"8501:8501\"\n    hostname: streamlit\n

    Copiez ensuite ce texte sur la VM dans un fichier docker-compose.yml (exemple : via nano)

    On constate qu'on d\u00e9clare 2 services: - 1 service \"yolo\" - 1 service \"streamlit\"

    On d\u00e9clare aussi les ports ouverts de chaque application

    Maintenant... comment lancer les deux applications ?

    docker-compose up dans le dossier o\u00f9 se trouve votre docker-compose.yml

    Hint

    Si docker-compose ne fonctionne pas, sudo apt -y install docker-compose

    Normalement: - le service de mod\u00e8le est accessible sur le port 8000 de la machine - le service streamlit est accessible sur le port 8501 de la machine - vous devez indiquer l'hostname \"yolo\" pour communiquer entre streamlit et le mod\u00e8le. En effet, les services sont accessibles via un r\u00e9seau sp\u00e9cial \"local\" entre tous les containers lanc\u00e9s via docker-compose

    "},{"location":"1_8_deployment_tp_long.html#acces-a-la-vm","title":"Acc\u00e8s \u00e0 la VM","text":"

    Hint

    Cela ne risque de fonctionner que en 4G

    Connectez vous via l'IP publique de la machine via votre navigateur web, sur le port 8501 : http://ip-de-la-machine:8501

    Vous devriez pouvoir acc\u00e9der \u00e0 votre d\u00e9ploiement !

    "},{"location":"1_8_deployment_tp_long.html#conclusion","title":"Conclusion","text":"

    \ud83c\udf89 Bravo ! \ud83c\udf89

    Vous avez d\u00e9ploy\u00e9 votre premier mod\u00e8le en production !

    "},{"location":"2_1_overview.html","title":"Introduction to Data Distribution","text":""},{"location":"2_1_overview.html#course-overview","title":"Course Overview","text":"
    • Data Distribution & Big Data Processing

    Harnessing the complexity of large amounts of data is a challenge in itself.

    But Big Data processing is more than that: originally characterized by the 3 Vs of Volume, Velocity and Variety, the concepts popularized by Hadoop and Google requires dedicated computing solutions (both software and infrastructure), which will be explored in this module.

    "},{"location":"2_1_overview.html#objectives","title":"Objectives","text":"

    By the end of this module, participants will be able to:

    • Understand the differences and usage between main distributed computing architectures (HPC, Big Data, Cloud, CPU vs GPGPU)
    • Implement the distribution of simple operations via the Map/Reduce principle in PySpark
    • Understand the principle of Kubernetes
    • Deploy a Big Data Processing Platform on the Cloud
    • Implement the distribution of data wrangling/cleaning and training machine learning algorithms using PyData stack, Jupyter notebooks and Dask
    "},{"location":"2_2_orchestration.html","title":"Intro to Orchestration and Kubernetes","text":""},{"location":"2_2_orchestration.html#intro-to-orchestration","title":"Intro to Orchestration","text":"

    Link to slides

    "},{"location":"2_2_orchestration.html#intro-to-kubernetes","title":"Intro to Kubernetes","text":"

    Link to slides

    "},{"location":"2_3_kub_handson.html","title":"Kubernetes: Zero to Jupyterhub using Google Kubernetes Engine","text":""},{"location":"2_3_kub_handson.html#what-is-jupyterhub","title":"What is JupyterHub","text":"

    JupyterHub brings the power of notebooks to groups of users. It gives users access to computational environments and resources without burdening the users with installation and maintenance tasks. Users - including students, researchers, and data scientists - can get their work done in their own workspaces on shared resources which can be managed efficiently by system administrators.

    JupyterHub runs in the cloud or on your own hardware, and makes it possible to serve a pre-configured data science environment to any user in the world. It is customizable and scalable, and is suitable for small and large teams, academic courses, and large-scale infrastructure. Key features of JupyterHub

    Customizable - JupyterHub can be used to serve a variety of environments. It supports dozens of kernels with the Jupyter server, and can be used to serve a variety of user interfaces including the Jupyter Notebook, Jupyter Lab, RStudio, nteract, and more.

    • Flexible - JupyterHub can be configured with authentication in order to provide access to a subset of users. Authentication is pluggable, supporting a number of authentication protocols (such as OAuth and GitHub).

    • Scalable - JupyterHub is container-friendly, and can be deployed with modern-day container technology. It also runs on Kubernetes, and can run with up to tens of thousands of users.

    • Portable - JupyterHub is entirely open-source and designed to be run on a variety of infrastructure. This includes commercial cloud providers, virtual machines, or even your own laptop hardware.

    The foundational JupyterHub code and technology can be found in the JupyterHub repository. This repository and the JupyterHub documentation contain more information about the internals of JupyterHub, its customization, and its configuration.

    "},{"location":"2_3_kub_handson.html#zero-to-jupyterhub-using-kubernetes","title":"Zero to Jupyterhub using Kubernetes","text":"

    JupyterHub allows users to interact with a computing environment through a webpage. As most devices have access to a web browser, JupyterHub makes it is easy to provide and standardize the computing environment of a group of people (e.g., for a class of students or an analytics team).

    This project will help you set up your own JupyterHub on a cloud and leverage the clouds scalable nature to support large groups of users. Thanks to Kubernetes, we are not tied to a specific cloud provider.

    "},{"location":"2_3_kub_handson.html#instructions","title":"Instructions","text":"
    • Go here and follow the instructions

    • Use Google Kubernetes Engine to setup your cluster

    Info

    You will use the same method later in the year to setup a Dask Kubernetes cluster using helm

    • Give some people the public IP of your cluster so that they can connect to it... try to make it scale !
    "},{"location":"2_4_functional.html","title":"Functional Programming","text":"

    This section of the course is not given this year.

    "},{"location":"2_4_functional.html#functional-programming-for-distributed-data","title":"Functional Programming for Distributed Data","text":"

    Link to slides

    "},{"location":"2_4_functional.html#introduction-to-julia","title":"Introduction to Julia","text":"

    As the first exercise, you'll need to install Julia and IJulia locally or make a working Julia Colab Notebook. While Colab is sufficient for today's exercises, it is recommended to make a local installation:

    Julia download Julia kernel for Jupyter

    Here is a Colab template from this Github repository which will install the Julia kernel for a single Colab instance.

    Once you have a Julia Jupyter kernel, follow this Julia for Pythonistas notebook.

    Github Colab

    "},{"location":"2_4_functional.html#functional-programming-in-julia","title":"Functional Programming in Julia","text":"

    Julia documentation explaining:

    • Functions, showing that they are first-class
    • the map function which is a higher-order function
    • distributed computing allowing for transfer of functions between threads or workers
    "},{"location":"2_4_functional.html#distributed-data-in-julia","title":"Distributed Data in Julia","text":"

    Julia's base language supports distributed calculation but there are a few packages which facilitate data processing tasks over distributed data:

    • DistributedArrays - A general Array type which can be distributed over multiple workers.
    • JuliaDB - A data structuring package which automatically handles distributed data storage and computation
    • Spark.jl - A Julia interface to Apache Spark. Related blog post.
    "},{"location":"2_4_functional.html#map-reduce-exercise","title":"Map Reduce Exercise","text":"

    The second part of this class is an interactive notebook in the Julia language covering the MapReduce programming framework, from simple addition queries to a grep example.

    MapReduce notebook

    MapReduce notebook on Colab (requires adding Julia kernel installation)

    "},{"location":"2_5_mapreduce.html","title":"Hadoop and MapReduce","text":"

    In this class, we start with an overview of the Big Data ecosystem, contextualizing Hadoop, No-SQL Databases, and Business Intelligence tools. We then cover Hadoop and the HDFS in detail with a simple MapReduce example.

    Slides

    • Introduction to Big Data and its ecosystem (1h)
    • What is Big Data?
    • Legacy \u201cBig Data\u201d ecosystem
    • Big Data use cases
    • Big Data to Machine Learning
    • Big Data platforms, Hadoop & Beyond (2h)
    • Hadoop, HDFS and MapReduce,
    • Datalakes, Data Pipelines
    • From HPC to Big Data to Cloud and High Performance Data Analytics
    • BI vs Big Data
    • Hadoop legacy: Spark, Dask, Object Storage ...

    It contains also a short interactive exercise using Python Map Reduce.

    "},{"location":"2_6_spark.html","title":"Spark","text":"

    In this class, we cover the Apache Spark framework, explaining Resilient Distributed Datasets, SparkSQL, Spark MLLib, and how to interact with a Spark cluster. We use PySpark in a Jupyter notebook to explore RDDs and see an example of distributed K-Means.

    Spark introduction

    Spark notebook

    Spark notebook on Colab

    "},{"location":"2_7_cloud.html","title":"Evolution of Data Management Systems","text":""},{"location":"2_7_cloud.html#fundamental-concepts-methods-and-applications","title":"Fundamental Concepts, Methods and Applications","text":"

    In this three part class, students will cover the history of data management systems, from file systems to databases to distributed cloud storage. This class is given over the length of the Data Engineering course. Questions from the first two parts are integrated into the exam on cloud computing, and questions from the Cloud DMS section are integrated into the Dask notebook evaluation.

    "},{"location":"2_7_cloud.html#objectives","title":"Objectives","text":"

    The objectives of this course are: - Introduce the fundamental concepts - Describe, in a synthetic way, the main characteristics of the evolution of DMS (Data Management Systems) - Highlight targeted application classes.

    "},{"location":"2_7_cloud.html#key-words","title":"Key Words","text":"

    Data Management Systems, Uni-processor DBMS, Parallel DBMS, Data Integration Systems,Big Data, Cloud Data Management Systems, High Performance, Scalability, Elasticity, Multi-store/Poly-store Systems

    "},{"location":"2_7_cloud.html#targeted-skills","title":"Targeted Skills","text":"
    • Effectively exploit the DMS according to the environment (uniprocessor, parallel, distributed, cloud) in a perspective of decision support within an organization.
    • Ability to choose, in a relevant way, a DMS in multiple environments for an optimal functioning of the applications of an organization
    "},{"location":"2_7_cloud.html#indicative-program","title":"Indicative Program","text":"
    1. Introduction to Main Problems of Data Management

      • From File Management Systems FMS to Database MS DBMS
      • Motivations, Objectives, Organizations & Drawbacks
      • Databases & Rel. DBMS: Motivations & Objectives
      • Resources:
        • Introduction
        • SGF - File Systems
        • Views - Relational Systems
        • File Organization
    2. Parallel Database Systems

      • Objectives and Parallel Architecture Models
      • Data Partitioning Strategies
      • Parallel Query Processing
      • Resources:
        • Parallel DBMS
        • Parallel Queries
        • Systems DB Parallel
    3. From Distributed DB to Data Integration Systems DIS

      • An Ex. of DDB, Motivations & Objectives
      • Designing of DDB
      • Distributed Query Processing
      • An Ex. of DIS
      • Motivations & Objectives
      • Mediator-Adapters Architecture
      • Design of a Global Schema (GAV, LAV)
      • Query Processing Methodologies
      • Resources:
        • Distributed DBMS - Chapter 1
        • Distributed DBMS - Chapter 2
        • Distributed DBMS - Chapter 3
        • Systems for integrating heterogeneous and distributed data
        • Integration Systems complement
        • Distributed DBMS Dec 2023
    4. Cloud Data Management Systems CDMS

      • Motivations and Objectives
      • Main Characteristics of Big Data and CDMS
      • Classification of Cloud Data Management Systems CDMS
      • Advantages and Weakness of Parallel RDBMS and CDMS
      • Comparison between Parallel RDBMS and CDMS
      • Introduction to Multi-store/Ploystore Systems
      • Resources:
        • Cloud Systems
        • MapReduce examples
    5. Conclusion

      • Maturity of Cloud DMS
      • Key Criteria for Choosing a Data Management System
    "},{"location":"2_7_cloud.html#additional-reading","title":"Additional Reading","text":"
    1. Principles of Distributed Database Systems, M. Tamer Ozsu and Patrick Valduriez; Springer-Verlag ; Fourth Edition, December 2019.

    2. Data Management in the Cloud: Challenges and Opportunities Divyakant Agrawal, Sudipto Das, and Amr El Abbadi; Synthesis Lectures on Data Management, December 2012, Vol. 4, No. 6 , Pages 1-138.

    3. Query Processing in Parallel Relational Database Systems; H. Lu, B.-C Ooi and K.-L. Tan; IEEE Computer Society Press, CA, USA, 1994.

    4. Traitement parall\u00e8le dans les bases de donn\u00e9es relationnelles : concepts, m\u00e9thodes et applications Abdelkader Hameurlain, Pierre Bazex, Franck Morvan; C\u00e9padu\u00e8s Editions, Octobre 1996.

    "},{"location":"2_8_dask.html","title":"Dask on Kubernetes","text":"

    In this class, we focus on getting a Dask cluster running in Kubernetes, which we will then use in the Dask project. Dask is a parallel computing library in Python which integrates well with machine learning tools like scikit-learn.

    This class builds on the orchestration class, going into further detail on K8S specifics.

    Kubernetes

    Dask presentation

    Students will use GCP for this class. Be sure to stop your cluster after class to conserve GCP credits.

    Additional resources can be found in the dask documentation.

    "},{"location":"2_8_dask.html#deploying-a-dask-hub","title":"Deploying a Dask Hub","text":"

    This material is taken from the following docs:

    • https://docs.dask.org/en/latest/setup/kubernetes-helm.html
    • https://zero-to-jupyterhub.readthedocs.io/en/latest/kubernetes/setup-kubernetes.html
    • https://zero-to-jupyterhub.readthedocs.io/en/latest/kubernetes/setup-helm.html
    "},{"location":"2_8_dask.html#creating-a-kubernetes-cluster","title":"Creating a Kubernetes Cluster","text":"

    First, you need to enable the Kubernetes API if not already done:

    • Go to console.cloud.google.com
    • Select the Kubernetes Engine in the menu
    • Enable the API if not already done.

    Then you'll need a terminal with gcloud and kubectl. The simplest is just to use the Google Cloud Shell from console.cloud.google.com. If you prefer, you can follow the links above to find how to install everything on your computer.

    Ask Google Cloud to create a managed Kubernetes cluster and a default node pool to get nodes from:

    gcloud container clusters create \\\n  --machine-type n1-standard-4 \\\n  --enable-autoscaling \\\n  --min-nodes 1 \\\n  --max-nodes 10 \\\n  --num-nodes 1 \\\n  --zone europe-west1-b \\\n  --cluster-version 1.23 \\\n  dask-hub-k8s\n

    Yhis will take a few minutes (maybe 2 or 3).

    gcloud container clusters list\n

    You can then test if the cluster is running:

    kubectl get node\n

    Then get permissions to perform all administrative actions needed.

    \u26a0\ufe0fDon't forget to replace your email below.\u26a0\ufe0f

    kubectl create clusterrolebinding cluster-admin-binding \\\n  --clusterrole=cluster-admin \\\n  --user=<GOOGLE-EMAIL-ACCOUNT>\n
    "},{"location":"2_8_dask.html#setting-up-helm","title":"Setting up Helm","text":"

    From your Google Cloud Shell or terminal:

    curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash\nhelm list\n

    should return:

    NAME    NAMESPACE       REVISION        UPDATED STATUS  CHART   APP VERSION\n
    "},{"location":"2_8_dask.html#helm-install-a-dask-hub","title":"Helm install a Dask Hub","text":"

    Default Daskhub configuration uses dask-gateway, which is here to handle multiple users with fine grain authorisations. We don't need this, and it iss a little more complicated setup than what we'll do.

    Instead, we'll deploy a Daskhub with dask-kubernetes, which assumes some authorisations inside the Pods of the Kubernetes cluster (potential security leak), but is more straightforward for our usage.

    Verify that you\u2019ve set up a Kubernetes cluster and added Dask\u2019s helm charts:

    helm repo add dask https://helm.dask.org/\nhelm repo update\n

    Generate token to configure Jupyterhub:

    openssl rand -hex 32  > token1.txt\ncat token1.txt\n

    Create the file below (for example using vim or cloud shell editor) and substitute the value.

    # file: daskhub-config.yaml\njupyterhub:\n  proxy:\n    secretToken: \"<token-1>\"\n  scheduling:\n    podPriority:\n      enabled: true\n    userPlaceholder:\n      # Specify three dummy user pods will be used as placeholders\n      replicas: 1\n    userScheduler:\n      enabled: true\n  singleuser:\n    serviceAccountName: daskkubernetes\n    image:\n      name: guillaumeeb/pangeo-ml-notebook # Image to use for singleuser environment. Must include dask-kubernetes.\n      tag: 2021.11.14\n\ndask-gateway:\n  enabled: false\n  gateway:\n    auth:\n      type: simple\n      simple:\n        password: \"unused\"\n\ndask-kubernetes:\n  enabled: true\n

    Now we just install Dask Hub:

    helm upgrade --wait --install --render-subchart-notes \\\n    --namespace daskhub \\\n    --create-namespace \\\n    dhub dask/daskhub \\\n    --values=daskhub-config.yaml\n

    This will again take a few minutes.

    helm list -n daskhub\n

    Check install and go to Jupyter!

    To get the public IP of your hub deployment:

    kubectl --namespace=daskhub get service proxy-public\n

    Get the external IP, and open it in your browser. You should be able to login with any username/password Ensure Dask is working, and K8S mecanisms too!

    "},{"location":"2_8_dask.html#create-a-dask-kubernetes-cluster","title":"Create a dask-kubernetes cluster","text":"

    Create a yaml file within the Jupyterhub interface:

    # worker-spec.yaml\n\nkind: Pod\nmetadata:\n  labels:\n    foo: bar\nspec:\n  restartPolicy: Never\n  containers:\n  - image: guillaumeeb/pangeo-ml-notebook:2021.11.14\n    imagePullPolicy: IfNotPresent\n    args: [dask-worker, --nthreads, '2', --no-dashboard, --memory-limit, 6GB, --death-timeout, '60']\n    name: dask\n    env:\n      - name: EXTRA_PIP_PACKAGES\n        value: xgboost\n    resources:\n      limits:\n        cpu: \"2\"\n        memory: 6G\n      requests:\n        cpu: \"1.7\"\n        memory: 6G\n

    Just open a notebook in your newly created Dask enabled hub, and try to copy and past the following cells:

    Set some config to ease usage.

    import dask\nimport dask.distributed  # populate config with distributed defaults\nimport dask_kubernetes\n\ndask.config.set({\"kubernetes.worker-template-path\": \"worker-spec.yaml\"})\ndask.config.set({\"distributed.dashboard.link\": \"{JUPYTERHUB_SERVICE_PREFIX}proxy/{port}/status\"})\n

    Create a cluster object.

    from dask_kubernetes import KubeCluster\n\ncluster = KubeCluster(deploy_mode='local') # Scheduler is started in the notebook process\ncluster\n

    This should display a fancy widget. You can open the Dask Dashboard from here.

    Now scale the cluster to get Dask-workers and connect to it.

    cluster.scale(20)\n
    from distributed import Client\n\nclient = Client(cluster)\nclient\n

    What's happening in your K8S cluster after some seconds/minutes? Launch some computation, what about Pi?

    We'll use Dask array, a Numpy extension, for this:

    import dask.array as da\n\nsample = 10_000_000_000  # <- this is huge!\nxxyy = da.random.uniform(-1, 1, size=(2, sample))\nnorm = da.linalg.norm(xxyy, axis=0)\nsumm = da.sum(norm <= 1)\ninsiders = summ.compute()\npi = 4 * insiders / sample\nprint(\"pi ~= {}\".format(pi))\n

    How many workers did you get? Why?

    Now just close the cluster.

    cluster.close()\n

    What happens after a few minutes?

    "},{"location":"2_8_dask.html#deleting-a-kubernetes-cluster","title":"Deleting a Kubernetes Cluster","text":"

    Get your cluster name and region

    gcloud container clusters list\n

    Delete your kubernetes cluster

    gcloud container clusters delete <YOUR_CLUSTER_NAME> --region <YOUR_CLUSTER_REGION>\n
    "},{"location":"2_9_project.html","title":"Project - Dask","text":"

    The evaluation for this class is a Dask notebook. You should run this notebook on a Daskhub using Kubernetes, like in the Dask on Kubernetes class. You should complete the exercises and answer the questions in the notebook, then turn it in through the LMS. You should work in a group of 2 to 4 and separate the work out equally between responding to questions, managing the infrastructure, and trying out different algorithms. Be sure to include the names of your group members in your submission.

    The notebook is due on March 12, 2024 at 23h59.

    Dask tutorial, if needed

    Evaluation notebook

    LMS depot

    "},{"location":"ctf.html","title":"Data Engineering Fundamentals Capture the Flag","text":"

    This class is a five day Capture the Flag event to get to know with the basics of systems usage, specifically linux, git, and ssh. There is also a large section on python, with an emphasis on data science scripting practices using numpy and pandas in jupyter notebooks.

    This is a self-guided exercise with resources and questions on this site. You, the participant, must look for the answer to the questions through reading documentation, discussing with others, and trying things. Try to avoid searching for answers online in a search engine; the answers can almost always be found in documentation.

    Answers can be submitted through an API with the CTF server. Questions will be made available over the course of 5 sessions. Responding correctly to a question gives 1 point, and an additional 0.5 points are awarded for being the first to submit the correct answer to a question. That half point is the flag - be the first to capture it!

    If you're speeding through the questions, consider helping others learn the material. Depending on your background, you may have varied experience with these tools. Get to know the other participants by helping them capture a flag too.

    "},{"location":"ctf.html#linux","title":"Linux","text":"

    Linux is an open-source operating system based on Unix. It is a standard choice for development and is the most dominant operating system for web servers, cloud computing, and high performance computing at 80% of global public servers. There are many different distributions but they share a common set of tools, notably GNU software. A very common Linux distribution is Android, at 73% of all mobile devices, so you might be a Linux user already without realizing it!

    You most likely don't use Linux as the operating system of your personal computer, however. If you are using one the 2.5 % of personal computers with Linux, you can skip straight to the Submission section

    MacOS is also based on Unix, so if you're using MacOS, most things should work just as in Linux! A few commands will be different from the course instructions, and the questions will always refer to Linux resources, for example documentation. It is highly recommended to install homebrew (https://brew.sh/) which will allow for package installation via the command line.

    "},{"location":"ctf.html#installation-on-windows","title":"Installation on Windows","text":"

    The easiest way to use Linux on Windows is through the Windows Subsystem for Linux. Installation instructions are here: https://docs.microsoft.com/en-us/windows/wsl/install. Make sure to follow all instructions carefully. If asked to join a \"Windows Insiders Program\", ignore this. By default, this installs Ubuntu, which is good for this systems class and for all of SDD.

    The WSL is similar to a virtual machine inside of Windows, but it integrates with some existing components of Windows. You can access your Windows files from Linux at /mnt/, but you should make sure you're familiar with Linux first.

    • About the WSL
    • WSL FAQ
    • How to Access WSL Linux Files from Windows
    "},{"location":"ctf.html#submission","title":"Submission","text":"

    All questions will be posted to the CTF github repository. In the second class, we will use git to download this repository locally, and it will be used to host the files and data needed to respond to questions.

    The CTF server's IP address is 34.155.94.97. You can see a leaderboard there and it is the address for submitting answers. The first way we'll look at submitting answers is with curl in Linux.

    Once you have a Unix-type environment, either native Linux or macOS, or through the WSL, you're ready to submit to the CTF. You will use the curl command; you can verify that you have curl by running which curl in the command line. curl is a tool for transferring data from or to a server. How do you know that? By checking the documentation of curl using man curl. Try it out!

    To respond to a question, send a POST request with the data of the question number and answer, and your username as user (your username should be your ISAE login, but you can also check on the leaderboard). For example, the first question asks where the curl executable is (hint: use which). Then use curl:

    curl -X POST 'http://34.155.94.97/' \\\n    -d 'number=1' \\\n    -d 'answer=your answer here' \\\n    -d 'user=your username here'\n

    Some of the questions will require access to some files, called file_a.txt, file_b.txt, and file_c.txt. Those are available on the CTF git repository.

    You are ready to start answering questions! If you don't know an answer, check the resources below and read documentation using man.

    You can see which questions you have answered by sending a GET request:

    curl 'http://34.155.94.97/user/d.wilson'\n

    You can also see which questions have remaining flags, the bonus points associated with answering the question for the first time, with a GET request:

    curl 'http://34.155.94.97/answers/'\n
    "},{"location":"ctf.html#python-submission","title":"Python Submission","text":"

    Note that you can use the requests library to submit responses:

    import requests\ndata = {\"number\": \"1\",\n        \"answer\": \"\",\n        \"user\": \"d.wilson\"}\nr = requests.post(\"http://34.155.94.97/\", data=data)\n
    "},{"location":"ctf.html#bash-resources","title":"Bash Resources","text":"
    • ISAE class on CLI, Linux, and Bash
    • Shell class from MIT
    • Bash exercises
    • More bash exercises
    • Short exercises in regular expressions
    "},{"location":"ctf.html#linux-tools","title":"Linux tools","text":"

    Now that you're an expert in Linux, let's quickly look at some useful tools. You may need to install some of these, either using apt, brew, yum, pacman, or whichever package manager you use. Linux comes with many programs installed by default, especially distributions like Ubuntu, however the tools in this section will be more useful than the base Linux tools. We'll cover four: apt for package management, top for system monitoring, tmux for terminal management, and vim for file editing. There are alternatives to all of these programs that are great, but it is worth being familiar with these four.

    "},{"location":"ctf.html#linux-resources","title":"Linux Resources","text":"
    • apt manual
    • Alternatives to top
    • Guide to tmux
    • tmux cheat sheet
    • Editors from MIT class
    • Vim adventures
    • tldr, short man pages
    "},{"location":"ctf.html#git","title":"Git","text":"

    Git is a version control system used worldwide for maintaining code, documents, video games, and much more. It has seen wide adoption with servers like Github and Gitlab while being an open-source tool that anyone can install as a client or server. In this class, we will look at repositories hosted on Github, but git is much larger than that and many organizations like ISAE have their own private git server.

    "},{"location":"ctf.html#installation","title":"Installation","text":"

    If you're using Ubuntu, chances are you already have git. If not, simply do:

    sudo apt install git

    These questions concern two repositories: the Machine Learning class in SDD (https://github.com/SupaeroDataScience/machine-learning) and the Seaborn library, a popular graphing library (https://github.com/mwaskom/seaborn). You will need to download both repositories. First choose a directory to host them in, for example ~/SDD/FSD312:

    mkdir -p ~/SDD/FSD312\ncd ~/SDD/FSD312\n

    and then download them using git clone:

    git clone https://github.com/SupaeroDataScience/machine-learning.git\ngit clone https://github.com/mwaskom/seaborn.git\n

    The commit for all questions on the seaborn repository is 1e6739 :

    git checkout 1e6739\n
    "},{"location":"ctf.html#git-resources","title":"Git Resources","text":"
    • Git course
    • Introduction to github
    • Github video course
    • Learn git branching
    • Git SCM book
    • Git cheat sheet
    "},{"location":"ctf.html#git-exercise","title":"Git Exercise","text":"

    In order to access the server for the next parts of the CTF, you will need to provide your public ssh key. The SSH section has references explaining public-key cryptography, but in general you will make a key pair with a private side and public side. You will give the public side to services like this class or Github to perform secure communication, keeping your private key secret to prove that it is you.

    First, start by making a key pair and uploading your public key to Github. This will allow you use to SSH to make push requests, instead of using a personal access token. Create an SSH key and add it to your Github account.

    Then, we will use git as a way for you to transfer your public key to the class. We could use another means, like a USB key, email, or a very large QR code, but for this exercise we will use git. First make a fork of the https://github.com/SupaeroDataScience/ctf2024 repository. Then, make a pull request with your key as a file in keys/. Please name your key with your name, like the example keys/dennis-wilson.pub. Be sure to upload only your public key. Do not ever upload your private key to public servers.

    Once your key is in the repository, you are ready for the SSH and Python portions of the CTF.

    "},{"location":"ctf.html#ssh","title":"SSH","text":"

    For the ssh section, you will connect to the CTF server to answer questions about the remote environment. Your public key must be uploaded to the git repository above to get access to the server. You will use the corresponding private key to access the server. Your user on the server is ctf and the IP is the same as the CTF webserver: 34.155.94.97.

    Please note that ISAE-EDU and ethernet block ssh to most servers, including this one and github.com. In order to ssh to the server, you will need to either use the eduroam network or a different network like a mobile hotspot.

    "},{"location":"ctf.html#ssh-resources","title":"SSH Resources","text":"
    • Ubuntu ssh manual
    • Guide in French
    • Cryptographie Asym\u00e9trique
    • How SSH works
    "},{"location":"ctf.html#python","title":"Python","text":"

    An overview and reminder of the python programming language, with a focus on numpy and pandas manipulation using Jupyter.

    "},{"location":"ctf.html#installation_1","title":"Installation","text":"

    You most likely have python installed on your Linux system, but it is worthwhile to make sure and to upgrade. Python 3.8, 3.9, or 3.10 are all supported.

    sudo apt install python3\n

    It is highly recommended to make a virtual environment to manage your python packages. There are three main libraries for virtual environments:

    • Virtualenv
    • Pipenv
    • Conda

    Virtualenv is recommended for new users on Linux. Conda, or the platform Anaconda, can be useful on Windows as many packages are built specifically for windows, but not all packages are available via conda. Pipenv is an exciting project aimed at Python developers, but it adds additional complexity.

    Once you have a virtual environment created, please install the following packages for the rest of the Seminars class:

    numpy\npandas\nscipy\nmatplotlib\njupyter\n

    The following packages will also be used in SDD:

    seaborn\nscikit-learn\nkeras\ntorch\ngeos\ngraphviz\nnltk\nnetworkx\nstatsmodels\npyspark\ncython\ncma\ngym\n
    "},{"location":"ctf.html#jupyter","title":"Jupyter","text":"

    Jupyter (stands for the three original languages in the project: Julia, Python, and R) is a way to use and develop code interactively in the browser. Once you've installed the jupyter package, you can run a Jupyter notebook by simply running jupyter notebook.

    For Windows users, you can run Jupyter in the WSL. As explained in this blog post, you simply need to execute jupyter notebook --no-browser on the WSL and then copy and paste the URL and token generated into a Windows browser.

    Some additional packages for improving Jupyter are nbopen nbdime RISE. Be sure to read their documentation before installing to verify if these are relevant to you.

    "},{"location":"ctf.html#python-resources","title":"Python Resources","text":"
    • Python 3 Documentation
    • Pip documentation
    • Pandas cheatsheet
    • Stanford Python and Numpy tutorial
    • Python seminar
    • Google Colab: Jupyter notebooks on the cloud
    • Binder: Also Jupyter notebooks on the cloud, not hosted by Google
    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"index.html","title":"Data Engineering","text":"

    The amount of data in the world, the form these data take, and the ways to interact with data have all increased exponentially in recent years. The extraction of useful knowledge from data has long been one of the grand challenges of computer science, and the dawn of \"big data\" has transformed the landscape of data storage, manipulation, and analysis. In this module, we will look at the tools used to store and interact with data.

    The objective of this class is that students gain:

    • First hand experience with and detailed knowledge of computing models, notably cloud computing
    • An understanding of distributed programming models and data distribution
    • Broad knowledge of many databases and their respective strengths

    As a part of the Data and Decision Sciences Master's program, this module aims specifically at providing the tool set students will use for data analysis and knowledge extraction using skills acquired in the Algorithms of Machine Learning and Digital Economy and Data Uses classes.

    "},{"location":"index.html#class-structure","title":"Class structure","text":"

    The class is structured in four parts:

    "},{"location":"index.html#data-engineering-fundamentals","title":"Data engineering fundamentals","text":"

    In this primer class, students will cover the basics of Linux command line usage, git, ssh, and data manipulation in python. The format of this class is an interactive capture-the-flag event.

    "},{"location":"index.html#data-storage","title":"Data storage","text":"

    This module covers Database Management Systems with a focus on SQL systems. For evaluation, students will install and manipulate data in PostgreSQL and MongoDB and compare the two systems.

    "},{"location":"index.html#data-computation","title":"Data computation","text":"

    A technical overview of the computing platforms used in the data ecosystem. We will briefly cover cluster computing and then go in depth on cloud computing, using Google Cloud Platform as an example. Finally, a class on GPU computing will be given in coordination with the deep learning section of the AML class.

    "},{"location":"index.html#data-distribution","title":"Data distribution","text":"

    In the final module, we cover the distribution of data, with a focus on distributed programming models. We will introduce functional programming and MapReduce, then use these concepts in a practical session on Spark. Finally, students will do a graded exercise with Dask.

    "},{"location":"0_1_databases.html","title":"Data Storage","text":"

    In this module on databases, database management systems will be covered. A basic understanding of SQL is considered as a prerequisite, and students can refer to the slides and additional resources if needed. For evaluation, students will install and explore the advantages of different DBMSs as a graded project.

    In this first class, we introduce the basics of database management systems and cover high level DBMS functionality.

    Slides

    For the next class, students should install PostgreSQL and MongoDB on their local machines.

    "},{"location":"0_1_databases.html#additional-resources","title":"Additional Resources","text":"
    • PostgreSQL documentation
    • MongoDB documentation
    • SQLBolt - SQL exercises
    • Databases introduction (fr)
    • A comprehensive overview of database systems (en)
    "},{"location":"0_2_ETL.html","title":"Extract, Transform, Load (ETL)","text":"

    In this module on ETL, we will cover the fundamental concepts and practices of data integration and processing. A basic understanding of databases and SQL is considered a prerequisite. Students can refer to the slides and additional resources if they need to refresh their knowledge. For evaluation, students will design and implement an ETL pipeline as a graded project.

    In this first class, we introduce the basics of ETL processes and cover high-level ETL functionality and tools.

    Slides

    "},{"location":"0_3_dbms.html","title":"Evolution of Data Management Systems","text":""},{"location":"0_3_dbms.html#fundamental-concepts-methods-and-applications","title":"Fundamental Concepts, Methods and Applications","text":"

    In this three part class, students will cover the history of data management systems, from file systems to databases to distributed cloud storage. This class is given over the length of the Data Engineering course. Questions from the first two parts are integrated into the exam on cloud computing, and questions from the Cloud DMS section are integrated into the Dask notebook evaluation.

    "},{"location":"0_3_dbms.html#objectives","title":"Objectives","text":"

    The objectives of this course are: - Introduce the fundamental concepts - Describe, in a synthetic way, the main characteristics of the evolution of DMS (Data Management Systems) - Highlight targeted application classes.

    "},{"location":"0_3_dbms.html#key-words","title":"Key Words","text":"

    Data Management Systems, Uni-processor DBMS, Parallel DBMS, Data Integration Systems,Big Data, Cloud Data Management Systems, High Performance, Scalability, Elasticity, Multi-store/Poly-store Systems

    "},{"location":"0_3_dbms.html#targeted-skills","title":"Targeted Skills","text":"
    • Effectively exploit the DMS according to the environment (uniprocessor, parallel, distributed, cloud) in a perspective of decision support within an organization.
    • Ability to choose, in a relevant way, a DMS in multiple environments for an optimal functioning of the applications of an organization
    "},{"location":"0_3_dbms.html#indicative-program","title":"Indicative Program","text":"
    1. Introduction to Main Problems of Data Management

      • From File Management Systems FMS to Database MS DBMS
      • Motivations, Objectives, Organizations & Drawbacks
      • Databases & Rel. DBMS: Motivations & Objectives
      • Resources:
        • Introduction
        • SGF - File Systems
        • Views - Relational Systems
        • File Organization
    2. Parallel Database Systems

      • Objectives and Parallel Architecture Models
      • Data Partitioning Strategies
      • Parallel Query Processing
      • Resources:
        • Parallel DBMS
        • Parallel Queries
        • Systems DB Parallel
    3. From Distributed DB to Data Integration Systems DIS

      • An Ex. of DDB, Motivations & Objectives
      • Designing of DDB
      • Distributed Query Processing
      • An Ex. of DIS
      • Motivations & Objectives
      • Mediator-Adapters Architecture
      • Design of a Global Schema (GAV, LAV)
      • Query Processing Methodologies
      • Resources:
        • Distributed DBMS - Chapter 1
        • Distributed DBMS - Chapter 2
        • Distributed DBMS - Chapter 3
        • Systems for integrating heterogeneous and distributed data
        • Integration Systems complement
        • Distributed DBMS Dec 2023
    4. Cloud Data Management Systems CDMS

      • Motivations and Objectives
      • Main Characteristics of Big Data and CDMS
      • Classification of Cloud Data Management Systems CDMS
      • Advantages and Weakness of Parallel RDBMS and CDMS
      • Comparison between Parallel RDBMS and CDMS
      • Introduction to Multi-store/Ploystore Systems
      • Resources:
        • Cloud Systems
        • MapReduce examples
    5. Conclusion

      • Maturity of Cloud DMS
      • Key Criteria for Choosing a Data Management System
    "},{"location":"0_3_dbms.html#additional-reading","title":"Additional Reading","text":"
    1. Principles of Distributed Database Systems, M. Tamer Ozsu and Patrick Valduriez; Springer-Verlag ; Fourth Edition, December 2019.

    2. Data Management in the Cloud: Challenges and Opportunities Divyakant Agrawal, Sudipto Das, and Amr El Abbadi; Synthesis Lectures on Data Management, December 2012, Vol. 4, No. 6 , Pages 1-138.

    3. Query Processing in Parallel Relational Database Systems; H. Lu, B.-C Ooi and K.-L. Tan; IEEE Computer Society Press, CA, USA, 1994.

    4. Traitement parall\u00e8le dans les bases de donn\u00e9es relationnelles : concepts, m\u00e9thodes et applications Abdelkader Hameurlain, Pierre Bazex, Franck Morvan; C\u00e9padu\u00e8s Editions, Octobre 1996.

    "},{"location":"0_3_postgres.html","title":"PostgeSQL","text":"

    In this practical session, we cover many examples of database queries with the popular DBMS PostgreSQL.

    Based on the TP by Christophe Garion, CC BY-NC-SA 2015.

    "},{"location":"0_3_postgres.html#setup","title":"Setup","text":"

    Before class, please install PostgreSQL and pgAdmin.

    "},{"location":"0_3_postgres.html#postgresql-installation","title":"PostgreSQL installation","text":"

    For this session, students should install PostgreSQL (v9 or higher) and pgAdmin (v4). Follow the installation instructions and make sure you have an initial database setup and the postgresql service running.

    • Installation on Ubuntu
    • Installation on Mac OS
    • Installation on Arch Linux
    • Installation on Windows Subsystem for Linux
    • Installation on Windows (and add the PostgreSQL binaries to your path)

    Additionally, add your login user as a postgresql superuser to enable database creation with your user:

    # bash shell in Linux or OSX\n$ sudo su -l postgres\n[postgres]$ createuser --interactive\n
    "},{"location":"0_3_postgres.html#pgadmin","title":"pgAdmin","text":"

    You can do all exercises directly through the psql shell for this class. However, it is useful to have a graphical confirmation of the database configuration. pgAdmin is one of many front-ends for Postgres. Install it by following the instructions on the pgAdmin site.

    "},{"location":"0_3_postgres.html#setup-database-creation","title":"Setup - database creation","text":"

    Once you've installated and configured PostgreSQL, create the first exercise database:

    # bash shell in Linux or OSX or windows powershell\n$ createdb db-mexico86\n

    you can also do this through an SQL shell:

    # SQL shell\npostgres=# CREATE DATABASE \"db-mexico86\";\n

    Confirm with pgAdmin that your database db-mexico86 was created. If you don't have any servers, create one by right-clicking. The host address is 127.0.0.1 and the maintenance database and username should be postgres.

    In pgAdmin, if you are asked for a password and don't know what your password is, you can reset the password of the postgres user:

    change password
    postgres=# ALTER USER postgres WITH PASSWORD \"newpassword\";\n
    "},{"location":"0_3_postgres.html#mexico86-database-simple-queries","title":"Mexico86 database - simple queries","text":"

    This database contains data from the 1986 football World Cup.

    You can download the database creation script individually:

    $ wget https://raw.githubusercontent.com/SupaeroDataScience/DE/master/scripts/mexico86/create-tables-std.sql\n

    Or git clone the class repository and navigate to the creation and insertion scripts.

    Once you have the scripts, run the database creation script in the mexico folder.

    # bash shell in Linux or OSX, or windows powershell\n$ psql -d db-mexico86 -f mexico86/create-tables-std.sql\n

    If that doesn't work, you can copy the script into the Query Tool in pgAdmin.

    Exercise 1.1: Look at the database creation scripts. What are the tables being created? What are their fields? Which fields are keys? Confirm these values in pgAdmin.

    Response Pays: (nom, groupe) Typematch: (type) Match: (paysl, paysv, butsl, butsv, type, date)

    You should be able to make queries now. You can either use PostgreSQL in interactive mode by running

    $ psql -d db-mexico86\n

    or write your solutions in an SQL file and run the file:

    $ echo \"SELECT groupe FROM pays;\" > a.sql\n$ psql -d db-mexico86 -f a.sql\n

    You can also use the Query Editor in pgAdmin for a graphical interface.

    Exercise 1.2: Write a query which lists the countries participating in the World Cup.

    Response
            nom \n---------------------\nArgentine\nItalie\nBulgarie\nCor\u00e9e\nMexique\nParaguay\nBelgique\nIrak\nURSS\nHongrie\nFrance\nCanada\nBr\u00e9sil\nEspagne\nIrlande du Nord\nAlg\u00e9rie\nDanemark\nRFA\nUruguay\n\u00c9cosse\nMaroc\nAngleterre\nPologne\nPortugal\n(24 rows)\n

    Exercise 1.3: Write a query which lists all matches as a pair of countries per match.

    Response
            paysl        |        paysv \n---------------------|---------------------\nBulgarie            | Italie\nArgentine           | Cor\u00e9e\nItalie              | Argentine\nCor\u00e9e               | Bulgarie\nCor\u00e9e               | Italie\nArgentine           | Bulgarie\nBelgique            | Mexique\nParaguay            | Irak\nMexique             | Paraguay\nIrak                | Belgique\nIrak                | Mexique\nParaguay            | Belgique\nCanada              | France\nURSS                | Hongrie\nFrance              | URSS\nHongrie             | Canada\nURSS                | Canada\nHongrie             | France\nEspagne             | Br\u00e9sil\nAlg\u00e9rie             | Irlande du Nord\nBr\u00e9sil              | Alg\u00e9rie\nIrlande du Nord     | Espagne\nIrlande du Nord     | Br\u00e9sil\nAlg\u00e9rie             | Espagne\nUruguay             | RFA\n\u00c9cosse              | Danemark\nDanemark            | Uruguay\nRFA                 | \u00c9cosse\n\u00c9cosse              | Uruguay\nDanemark            | RFA\nMaroc               | Pologne\nPortugal            | Angleterre\nAngleterre          | Maroc\nPologne             | Portugal\nAngleterre          | Pologne\nMaroc               | Portugal\nBr\u00e9sil              | Pologne\nFrance              | Italie\nMaroc               | RFA\nMexique             | Bulgarie\nArgentine           | Uruguay\nAngleterre          | Paraguay\nURSS                | Belgique\nEspagne             | Danemark\nBr\u00e9sil              | France\nRFA                 | Mexique\nArgentine           | Angleterre\nBelgique            | Espagne\nFrance              | RFA\nArgentine           | Belgique\nRFA                 | Argentine\n(51 rows)\n

    Exercise 1.4: Write a query which lists the matches which took place on June 5, 1986.

    Response
            paysl        |   paysv\n---------------------|-----------\nItalie              | Argentine\nCor\u00e9e               | Bulgarie\nFrance              | URSS\n(3 rows)\n

    Exercise 1.5: Write a query which lists the countries which France played against (hint, France could have played either side).

    Response
    pays\n---------\nBr\u00e9sil\nCanada\nHongrie\nItalie\nRFA\nURSS\n(6 rows)\n

    Exercise 1.6: Write a query which returns the winner of the World Cup

    Response
    pays\n-----------\nArgentine\n(1 row)\n
    "},{"location":"0_3_postgres.html#beer-database","title":"Beer database","text":"

    We'll now use a database which tracks the beers that a group of friends enjoy. Create the database and populate it using the provided scripts.

    $ createdb db-beer\n$ psql -d db-beer -f beer/create-tables-std.sql\n$ psql -d db-beer -f beer/insert.sql\n

    Exercise 2.1: Look at the database creation scripts. What are the tables being created? What are their fields? Which fields are keys? Confirm these values in pgAdmin.

    Response Frequente: (buveur, bar) Sert: (bar, biere) Aime: (buveur, biere)

    Write queries which respond to the following questions. Hint, understanding natural joins may help.

    Exercise 2.2 What is the list of bars which serve the beer that Martin likes?

    Response
            bar \n-------------------\n Ancienne Belgique\n La Tireuse\n Le Filochard\n(3 rows)\n

    Exercise 2.3 What is the list of drinkers who go to at least one bar which servers a beer they like?

    Response
     buveur \n--------\n Bob\n David\n Emilie\n Martin\n(4 rows)\n

    Exercise 2.3 What is the list of drinkers who don't go to any bars which serve the beer they like?

    Response
     buveur \n--------\n Cecile\n Alice\n(2 rows)\n
    "},{"location":"0_3_postgres.html#complex-queries-mexico-database","title":"Complex queries - Mexico database","text":"

    Exercise 3.1: Create a table with an entry for each match which lists the total number of goals (scored by either side), the match type, and the date. As we'll use this table later on, create a VIEW called \"matchbutsglobal\" with this information.

    Response
            paysl        |        paysv        | buts |  type  |    date \n---------------------+---------------------+------+--------+------------\n URSS                | Belgique            |    7 | 1/8    | 1986-06-15\n France              | Italie              |    2 | 1/8    | 1986-06-17\n Maroc               | Pologne             |    0 | Poule  | 1986-06-02\n RFA                 | Argentine           |    5 | Finale | 1986-06-29\n Br\u00e9sil              | France              |    2 | 1/4    | 1986-06-21\n Italie              | Argentine           |    2 | Poule  | 1986-06-05\n Maroc               | Portugal            |    4 | Poule  | 1986-06-11\n Br\u00e9sil              | Alg\u00e9rie             |    1 | Poule  | 1986-06-06\n Paraguay            | Belgique            |    4 | Poule  | 1986-06-11\n Hongrie             | France              |    3 | Poule  | 1986-06-09\n Irak                | Belgique            |    3 | Poule  | 1986-06-08\n Danemark            | RFA                 |    2 | Poule  | 1986-06-13\n Irlande du Nord     | Espagne             |    3 | Poule  | 1986-06-07\n Alg\u00e9rie             | Irlande du Nord     |    2 | Poule  | 1986-06-03\n RFA                 | Mexique             |    0 | 1/4    | 1986-06-21\n URSS                | Hongrie             |    6 | Poule  | 1986-06-02\n Mexique             | Paraguay            |    2 | Poule  | 1986-06-07\n Belgique            | Espagne             |    2 | 1/4    | 1986-06-22\n Irak                | Mexique             |    1 | Poule  | 1986-06-11\n Espagne             | Br\u00e9sil              |    1 | Poule  | 1986-06-01\n Angleterre          | Maroc               |    0 | Poule  | 1986-06-06\n Irlande du Nord     | Br\u00e9sil              |    2 | Poule  | 1986-06-12\n Maroc               | RFA                 |    1 | 1/8    | 1986-06-17\n Belgique            | Mexique             |    3 | Poule  | 1986-06-03\n Bulgarie            | Italie              |    2 | Poule  | 1986-05-31\n \u00c9cosse              | Uruguay             |    0 | Poule  | 1986-06-13\n Alg\u00e9rie             | Espagne             |    3 | Poule  | 1986-06-12\n Argentine           | Belgique            |    2 | 1/2    | 1986-06-25\n Br\u00e9sil              | Pologne             |    4 | 1/8    | 1986-06-16\n Danemark            | Uruguay             |    7 | Poule  | 1986-06-08\n Cor\u00e9e               | Italie              |    5 | Poule  | 1986-06-10\n Canada              | France              |    1 | Poule  | 1986-06-01\n Argentine           | Uruguay             |    1 | 1/8    | 1986-06-16\n France              | RFA                 |    2 | 1/2    | 1986-06-25\n France              | URSS                |    2 | Poule  | 1986-06-05\n Uruguay             | RFA                 |    2 | Poule  | 1986-06-04\n Angleterre          | Pologne             |    3 | Poule  | 1986-06-11\n Portugal            | Angleterre          |    1 | Poule  | 1986-06-03\n \u00c9cosse              | Danemark            |    1 | Poule  | 1986-06-04\n Angleterre          | Paraguay            |    3 | 1/8    | 1986-06-18\n Hongrie             | Canada              |    2 | Poule  | 1986-06-06\n Argentine           | Cor\u00e9e               |    4 | Poule  | 1986-06-02\n Pologne             | Portugal            |    1 | Poule  | 1986-06-07\n RFA                 | \u00c9cosse              |    3 | Poule  | 1986-06-08\n Mexique             | Bulgarie            |    2 | 1/8    | 1986-06-15\n URSS                | Canada              |    2 | Poule  | 1986-06-09\n Espagne             | Danemark            |    6 | 1/8    | 1986-06-18\n Paraguay            | Irak                |    1 | Poule  | 1986-06-04\n Argentine           | Bulgarie            |    2 | Poule  | 1986-06-10\n Argentine           | Angleterre          |    3 | 1/4    | 1986-06-22\n Cor\u00e9e               | Bulgarie            |    2 | Poule  | 1986-06-05\n(51 rows)\n

    Exercise 3.2: Write a query which caluculates the number of goals scored on average in all the matches of the French team.

    Response
        Moyenne buts\n--------------------\n 2.0000000000000000\n(1 row)\n

    Exercise 3.3: Write a query which calculates the total number of goals scored only by the French team.

    Response
     buts \n------\n    8\n(1 row)\n

    Exercise 3.4: Write a query which caluclates the total number of goals scored in each Poule match. Order the results by group.

    Response
     groupe | sum \n--------+-----\n A      |  17\n B      |  14\n C      |  16\n D      |  12\n E      |  15\n F      |   9\n(6 rows)\n

    Exercise 3.5: Write a function vainquer which takes in the two countries of a match and the match type and which returns the winner. Apply your function to the following pairs:

    SELECT * FROM vainqueur('Espagne', 'Danemark', '1/8');\nSELECT * FROM vainqueur('Br\u00e9sil', 'France', '1/4');\n
    Response
     vainqueur \n-----------\n Espagne\n(1 row)\n\n vainqueur \n-----------\n Match nul\n(1 row)\n

    Exercise 3.6: Write a function butsparequipe which returns the total and the average number of points scored by a team. Apply your function to the French team. Bonus points for making the result display the name of the team.

    SELECT * FROM butsparequipe('France');\n
    Response
      pays  | total |      moyenne \n--------+-------+--------------------\n France |     8 | 1.3333333333333333\n(1 row)\n

    Exercise 3.7: Using the butsparequipe function, write a query which lists all countries and the points they scored.

    Response
            pays         | total \n---------------------+-------\n Argentine           |    14\n Italie              |     5\n Bulgarie            |     2\n Cor\u00e9e               |     4\n Mexique             |     6\n Paraguay            |     4\n Belgique            |    10\n Irak                |     1\n URSS                |    12\n Hongrie             |     2\n France              |     8\n Canada              |     0\n Br\u00e9sil              |     9\n Espagne             |    11\n Irlande du Nord     |     2\n Alg\u00e9rie             |     1\n Danemark            |    10\n RFA                 |     8\n Uruguay             |     2\n \u00c9cosse              |     1\n Maroc               |     3\n Angleterre          |     7\n Pologne             |     1\n Portugal            |     2\n(24 rows)\n

    Exercise 3.8: Using the butsparequipe function, write a query which shows the country which scored the most points and the number of points they scored.

    Response
       pays    | total \n-----------+-------\n Argentine |    14\n(1 row)\n
    "},{"location":"0_3_postgres.html#pull-the-trigger","title":"Pull the trigger","text":"

    In this exercise, we're going to create a TRIGGER, a mechanism which allows for automatically executing actions when an event occurs.

    Create the db-trigger database.

    $ createdb db-trigger\n

    Exercise 4.1: Create a table rel(nom, value) where nom is a string of characters and value is an integer. nom will be the primary key

    Solution
    CREATE TABLE IF NOT EXISTS rel (\n    nom VARCHAR(20),\n    valeur INTEGER,\n    PRIMARY KEY (nom)\n);\n

    Exercise 4.2: Add 5 tuples into the table

    Solution
    INSERT INTO rel VALUES\n       ('Alice', 10),\n       ('Bob', 5),\n       ('Carl', 20),\n       ('Denise', 11),\n       ('Esther', 6);\n

    Exercise 4.3: Write a trigger such that, when adding new tuples, the average value of val cannot decrease. If a new tuple is added which would decrease the average, an exception should be raised.

    The following insertion should work:

    INSERT INTO rel VALUES ('Fab', 15);\n\nSELECT * FROM rel;\n

    As we can see, the (Fab, 15) tuple was added:

      nom   | valeur \n--------+--------\n Alice  |     10\n Bob    |      5\n Carl   |     20\n Denise |     11\n Esther |      6\n Fab    |     15\n(6 rows)\n

    However, the following insertion should give an exception:

    INSERT INTO rel VALUES ('Guy', 2);\n
    Solution
    CREATE OR REPLACE FUNCTION verifier_moyenne()\n                  RETURNS trigger AS $verifier_moyenne$\n    DECLARE\n      moyenne FLOAT;\n      nb      INTEGER;\n    BEGIN\n        moyenne := AVG(valeur) FROM rel;\n        nb := COUNT(*) FROM rel;\n\n        IF ((nb * moyenne + NEW.valeur) / (nb + 1)) < moyenne THEN\n            RAISE EXCEPTION 'problem with insertion: valeur average is decreasing!';\n        END IF;\n\n        RETURN NEW;\n    END;\n$verifier_moyenne$ LANGUAGE plpgsql;\n\nCREATE TRIGGER VerificationMoyenne\nBEFORE INSERT ON rel\nFOR EACH ROW\nEXECUTE PROCEDURE verifier_moyenne();\n
    "},{"location":"0_4_project.html","title":"Databases Project","text":"

    This project is detailed in the ETL class.

    You are part of a 4-person data engineering team at a startup, tasked with designing and implementing an ETL/ELT pipeline. Your assignment is to submit a 2-4 page report detailing the choices made for the ETL/ELT pipeline and to provide a demo of an example database.

    In your report, you need to clearly explain and justify your decisions for each phase of the pipeline:

    1. Extract (E): Identify and explain where the data is coming from. Discuss the sources and why they were chosen.

    2. Transform (T): Explain how the data is being transformed. Describe the processes, tools, and techniques used to clean, aggregate, or modify the data to make it useful for its intended purpose.

    3. Load (L): Detail how the data is loaded into the system, how it is stored, and how it will be used or queried. Discuss the database or storage options chosen, and explain how the data will be utilized by the organization or application.

    Along with the report, you are expected to provide a demo of an example database. You can use PostgreSQL, MongoDB, or another database system of your choice. The demo should include:

    • Documented scripts to load and manipulate example data that demonstrates the choices made for the ETL pipeline.
    • The data used in the demo does not need to be exhaustive, but it should be sufficient to illustrate the key decisions in the ETL process.
    "},{"location":"0_4_project.html#grading-criteria","title":"Grading Criteria:","text":"
    • Report Rigor (6 points): Depth and thoroughness in explaining your ETL/ELT choices.
    • Report Clarity (6 points): How clearly and effectively your report communicates the ETL/ELT pipeline.
    • Demo Data (4 points): Appropriateness and accuracy of the example data used in the demo.
    • Demo Manipulation (4 points): Functionality and quality of the data manipulation demonstrated in the example.
    "},{"location":"0_4_project.html#deadline","title":"Deadline:","text":"
    • The report and demo must be submitted by October 11, 2024, end of day to the LMS.
    "},{"location":"1_1_overview.html","title":"Data Computation Part 1: Cloud Computing, Containers & Deployment","text":""},{"location":"1_1_overview.html#syllabus","title":"Syllabus","text":""},{"location":"1_1_overview.html#introduction","title":"Introduction","text":"

    Introduction to data computation module

    Link to slides

    "},{"location":"1_1_overview.html#cloud-computing-remote-development-3h","title":"Cloud Computing & Remote Development (3h)","text":"

    Intro to cloud computing & remote development environments

    "},{"location":"1_1_overview.html#google-cloud-platform-3h","title":"Google Cloud Platform (3h)","text":"

    Discover Google Cloud Platform with your student credits !

    "},{"location":"1_1_overview.html#containers-3h","title":"Containers (3h)","text":"

    Intro to containers & docker

    "},{"location":"1_1_overview.html#be-gcp-containers-3h","title":"BE : GCP & Containers (3h)","text":"

    A small workshop that puts everything together: Google cloud & docker

    "},{"location":"1_1_overview.html#be-deploy-your-ml-model-in-production-3h","title":"BE : Deploy your ML model in production (3h)","text":"

    Deploy your machine learning model in production with everything you've learnt

    We will then switch to the introduction to orchestration and kubernetes lectures

    "},{"location":"1_1_overview.html#quiz-and-recap","title":"Quiz and recap","text":"

    The evaluation of this section will be done with an open-resource quiz covering all cloud computing topics.

    The conclusion slides should be used to recap the previous courses.

    "},{"location":"1_2_cloud.html","title":"Cloud Computing & Remote Development Environment","text":""},{"location":"1_2_cloud.html#cloud-computing","title":"Cloud Computing","text":"

    Link to slides

    "},{"location":"1_2_cloud.html#remote-development","title":"Remote Development","text":"

    Link to slides

    "},{"location":"1_2_setup_codespace.html","title":"Remote Development hands-on","text":""},{"location":"1_2_setup_codespace.html#1-abstract","title":"1. Abstract","text":"

    Abstract

    In this hands on you will start to manipulate a Github Codespace remote development environment to get familiar about manipulating code and data not stored in your computer We will also discover streamlit which is a python library used to build frontend, and discover how to preview some things from the github codespace to your machine

    Warning

    Some things may only work on eduroam or in 4G... Some things may only works on Google Chrome

    Warning

    Don't forget to shutdown everything when you're done !

    Note

    When the TP says to replace \"{something}\" with a name, don't include the brackets so write \u201cyourname\"

    "},{"location":"1_2_setup_codespace.html#1-my-first-virtual-machine-github-codespaces","title":"1. My first \"Virtual Machine\", Github Codespaces","text":"

    First, you will need a GitHub account. You should already have one, otherwise create one.

    "},{"location":"1_2_setup_codespace.html#intro-to-github-codespaces","title":"Intro to Github Codespaces","text":"
    • Github Codespaces is a \"managed VM\" made available to develop without needing to configure locally your environment.
    • Compared to configured a VM by yourself, this one comes loaded with developer tools, and thus is faster to use,
    • You have a free tier of 60 CPU hours / months and some disk space
    • You pay for the CPI when the VM is ON and for the disk when the codespace is create

    Have a look at the overview : https://docs.github.com/en/codespaces/overview

    Question

    • Can you describe it with your own words ?
    • How would ChatGPT (or any LLM) describe it ?

    Note

    Google Cloud has a similar service with Google Cloud Shell but since Codespaces is way more powerful, we will be using that

    "},{"location":"1_2_setup_codespace.html#create-your-codespace-and-connect-to-it","title":"Create your codespace and connect to it","text":"

    Go to https://github.com/fchouteau/isae-cloud-computing-codespace

    • Click on the top left corner for a new codespace
    • It should launch a browser with a vscode
    • Launch a terminal using the top right menu

    If that does not work, go to https://github.com/github/codespaces-blank and create a codespace from there

    You should arrive to a VScode instance

    Question

    • Where is it running ?

    If you go to the core page of https://github.com/codespaces you should see your codespace running

    "},{"location":"1_2_setup_codespace.html#explore-github-codespaces","title":"Explore github codespaces","text":"

    Github Codespace Getting Started

    Identify the following features in the interface

    Code editor (e.g., VS Code)\nTerminal\nFile explorer\nDebugging tools (e.g., breakpoints, console output)\n

    You can then carry these commands in order to get a feel of the \"computer\" behind

    • Check available disk space
    Bash command to run

    df -h

    • Check the OS name
    Bash command to run

    cat /etc/os-release

    • Check the CPU model
    Bash command to run

    cat /proc/cpuinfo

    • This is the hardware model... how many cores do you have available ? Which amount of RAM ?
    Help

    htop will give you your current usage and available cores, or you can do nproc

    • Try and upload a file from your computer to the codespace by right clicking on the file explorer on the left

    • Create a new file and write a simple python \"Hello World\", then execute it from the terminal

    "},{"location":"1_2_setup_codespace.html#a-demo-of-codespace-port-forwarding-web-preview","title":"A demo of codespace port forwarding / web preview","text":"
    • In your codespace, run jupyter lab to launch the jupyter lab installed in it
    • Check the \"port\" preview : It should have a new entry with the 8888 port. If not, create it manually
    • Click on open in browser
    • Copy the token from your terminal to the web browser
    • You are new in a jupyterlab hosted on your github codespace VM !

    Question

    Magic !? What do you think is happening ? Try to describe it with your own words

    • Cancel (CTRL+C) the jupyter process

    To learn more about port forwarding in codespaces, refer to the documentation

    "},{"location":"1_2_setup_codespace.html#2-running-your-notebooks-in-the-vm","title":"2. Running your notebooks in the VM","text":"

    As an exercise, you will setup your development environment in the codespace and run an MLClass Notebook inside the VM,

    • Transfer a notebook you are working on from your computer
    • Transfer the data as well if it's not downloaded
    • Setup your environment using pip, conda, etc... as you would do in your local machine
    • Run jupyter lab or jupyter notebook from your codespace and connect to it like previously
    • You can continue your script / etc...

    If you don't have anything at hand you can use this simple repo as an example (you will see that later on your DL classes) : https://github.com/pytorch/examples/tree/main/mnist

    Question

    How comfortable do you feel with this remote machine ? Is it easy to get data in or out ? Code in or out ?

    "},{"location":"1_2_setup_codespace.html#3-lets-discover-streamlit","title":"3. Let's discover Streamlit","text":"

    We will now introduce streamlit, which is a very nice tool to build quick webapps in python !

    In this TP you will build your first interactive webapp in python and preview it using codespace. This will help you get a feel of using the remote vscode

    First, look at this video,

    Your browser does not support the video tag.

    Then, take a look at an introduction to streamlit and the streamlit application gallery

    Question

    Can you describe what exactly is streamlit ? Could you find any way it could be useful to you ?

    "},{"location":"1_2_setup_codespace.html#31-your-first-streamlit-application","title":"3.1. Your first streamlit application","text":"

    Take a look at the code below,

    import streamlit as st\nfrom streamlit_image_comparison import image_comparison\nimport cv2\n\nst.set_page_config(\"Webb Space Telescope vs Hubble Telescope\", \"\ud83d\udd2d\")\n\nst.header(\"\ud83d\udd2d J. Webb Space Telescope vs Hubble Telescope\")\n\nst.write(\"\")\n\"This is a reproduction of the fantastic [WebbCompare](https://www.webbcompare.com/index.html) app by [John Christensen](https://twitter.com/JohnnyC1423). It's built in Streamlit and takes only 10 lines of Python code. If you like this app, please star [John's original repo](https://github.com/JohnEdChristensen/WebbCompare)!\"\nst.write(\"\")\n\nst.markdown(\"### Southern Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/southern_nebula_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/southern_nebula_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\n\nst.markdown(\"### Galaxy Cluster SMACS 0723\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/deep_field_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/deep_field_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Carina Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/carina_2800.png\",\n    img2=\"https://www.webbcompare.com/img/webb/carina_2800.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Stephan's Quintet\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/stephans_quintet_2800.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/stephans_quintet_2800.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n

    Question

    Can you describe, by reading the documentation, what does the code do ?

    "},{"location":"1_2_setup_codespace.html#32-local-deployment-in-codespace","title":"3.2. Local deployment in codespace","text":"

    First, we will install in the codespace the dependencies for our application,

    pip install streamlit streamlit opencv-python-headless streamlit-image-comparison

    Then create a file streamlit_jswt.py and copy/paste the code above.

    Then execute it streamlit run streamlit_jswt.py

    This will launch the application on the port 8501 (by default) of our codespace. You can connect to it as usual.

    \ud83e\udd29 Nice, isn't it ?

    Now you can quit the server.

    "},{"location":"1_2_setup_codespace.html#33-a-more-complex-application","title":"3.3. A more complex application","text":"

    We will run and package a more complex application, but a lot more useful for your deep learning class.

    If you started your github codespace from the isae cloud computing codespace, you should have a folder called demo-streamlit-activation-function.

    Otherwise, clone the repository git clone https://github.com/fchouteau/isae-cloud-computing-codespace.git

    cd to the directory cd isae-demo-streamlit-activation-functions then as last time, install the dependencies pip install -r requirements.txt then run the application streamlit run app.py

    You can visualize it as last time. This should be quite useful for you given you just left (or will just start, it's early in the year...) the Deep Learning Class !

    "},{"location":"1_3_gcp_handson.html","title":"Google Cloud Platform Hands-on","text":""},{"location":"1_3_gcp_handson.html#0-abstract","title":"0. Abstract","text":"

    Abstract

    In this hands on you will configure your GCP account, the google cloud SDK and access the cloud console using Google Cloud Shell, You will also discover a very useful tool, a managed jupyter notebook service from google named Google Colab which may be very important for your future developments this year

    Warning

    Some things may only work on eduroam or in 4G...

    Warning

    Don't forget to shutdown everything when you're done since it costs you money. At the end, even if you have not finished the TP, go to the section 8 \"Cleaning Up\"

    Tip

    When the TP says to replace \"{something}\" with a name, don't include the brackets so write \u201cyourname\"

    Tip

    If you are lost on where you are, normally the terminal has the hostname indicated, otherwise run the command hostname

    "},{"location":"1_3_gcp_handson.html#1-create-your-gcp-account","title":"1. Create your GCP Account","text":"

    Note

    You should have already done that last week

    Here you will each create a Google Cloud Platform account and project using the student credits given this year,

    Overview link

    • Create an account within Google cloud Platform using your ISAE e-mail
    • Use the code given by Dennis to redeem your free credits
    • You should have a free tier available to you as well as coupons
    • From the interface you should create a project with a name of your choice (it is recommended to put for example sdd2425-yourname so that it is clear)
    "},{"location":"1_3_gcp_handson.html#2-reconnect-to-github-codespaces","title":"2. (re)connect to GitHub Codespaces","text":""},{"location":"1_3_gcp_handson.html#if-you-still-have-your-codespace-from-last-time","title":"If you still have your codespace from last time","text":"

    If you go to the core page of https://github.com/codespaces and you see an existing codespace from last week, you can restart it using the (...) menu

    If you don't have one, recreate it (see below)

    "},{"location":"1_3_gcp_handson.html#create-your-codespace-and-connect-to-it","title":"Create your codespace and connect to it","text":"

    Go to https://github.com/fchouteau/isae-cloud-computing-codespace

    • Click on the top left corner for a new codespace
    • It should launch a browser with a vscode
    • Launch a terminal using the top right menu

    If that does not work, go to https://github.com/github/codespaces-blank and create a codespace from there

    You should arrive to a VScode instance

    If you go to the core page of https://github.com/codespaces you should see your codespace running

    "},{"location":"1_3_gcp_handson.html#3-install-google-cloud-sdk-configure-the-shell","title":"3. Install Google Cloud SDK & Configure the shell","text":"

    If you want to interact with GCP from your computer or codespaces, you will need to install the Google Cloud SDK, which will also install a shell if you are on windows

    Warning

    If you have a codespace cloned from mine, the google cloud sdk is already installed. Try gcloudto check that, and skip this if this returns something

    Note

    You can install the cloud shell locally, but I recommend using your codespace

    Installing locally

    The best ways to interact with google cloud SDK is with a terminal so in that order:

    • Ubuntu / Debian https://cloud.google.com/sdk/docs/install#deb
    • Other Linux (either VM or native): https://cloud.google.com/sdk/docs/install#linux
    • MacOS: https://cloud.google.com/sdk/docs/install#mac
    • Windows Subsystem for Linux: see Linux
    • Windows: https://cloud.google.com/sdk/docs/install#windows
    Installing on codespace

    If you are on codespace, run the commands below to install the gcloud tool to your machine

    Note : If you used the custom codespace, it should already be installed, try gcloud init directly

    echo \"deb https://packages.cloud.google.com/apt cloud-sdk main\" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list\ncurl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -\nsudo apt-get update && sudo apt-get install google-cloud-cli\n

    Then run gcloud init in your terminal to configure the google cloud sdk with your account

    You should at some point see at terminal with a link. Click on the link and login with your google accound, then copy the token to your codespace.

    Your github codespace is now configured with your google cloud platform credentials

    "},{"location":"1_3_gcp_handson.html#4-my-first-google-compute-engine-instance","title":"4. My first Google Compute Engine Instance","text":"

    First, we will make our first steps by creating a compute engine instance (a vm) using the console, connecting to it via SSH, interacting with it, uploading some files, and we will shut it down and make the magic happen by resizing it

    • What is google cloud compute engine ? try to describe it with your own words
    "},{"location":"1_3_gcp_handson.html#4a-creating-my-vm-using-the-console-the-gui","title":"4a. Creating my VM using the console (the GUI)","text":"
    • Create your VM from the google cloud interface : Go to this link and follow the \"CONSOLE\" instruction

    • Create an instance with the following parameters

      • type: n1-standard-1
      • zone: europe-west1-b (Belgium)
      • os: ubuntu 22.04 x86
      • boot disk size: 10 Gb
      • boot disk type: pd-standard
    • Give it a name of your choice (that you can remember)
    • DO NOT SHUT IT DOWN for now
    Note

    If you were using the command line, you would have done this

    gcloud compute instances create {name} --project={your-project} --zone={your-zone} \\\n  --machine-type=n1-standard-1 \\\n  --image=ubuntu-2204-jammy-v20231030 \\\n  --image-project=ubuntu-os-cloud\n  --create-disk=auto-delete=yes,boot=yes,device-name=dev-instance-{index},image=projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20231030,mode=rw,size=10,type=projects/sdd2324/zones/{your-zone}/diskTypes/pd-standard \\\n
    "},{"location":"1_3_gcp_handson.html#4b-connecting-to-ssh","title":"4b. Connecting to SSH","text":"
    • Connect to ssh from the github codespace

      Solution

      gcloud compute ssh ${MACHINE-NAME}

      Note

      We are using google compute ssh instead of ssh. This is an automated tool that takes care of locating your machine in GCP and transferring the keys

    • Check available disk space

      Solution

      df -h

    • Check the OS name

      Solution

      cat /etc/os-release

    • Check the CPU model

      Solution

      cat /proc/cpuinfo

    • Check the number of cores available and the RAM

      Solution

      htop

    "},{"location":"1_3_gcp_handson.html#4c-the-magic-of-redimensioning-vms","title":"4c. The magic of redimensioning VMs","text":"
    • Shutdown the VM (from the web browser), check the previous codelab to see how to do it
    • Select it and click on EDIT
    • Change the machine type to n1-standard-2 (link to documentation)
    • Relaunch it, reconnect to it and try to check using htop the number of cores & RAM available
    • Note : If you run cat /proc/cpuinfo again you will see that you are running on the same hardware !

    Magic isn't it ?

    Note: If you had any files and specific configuration, they would still be here !

    "},{"location":"1_3_gcp_handson.html#4d-transfering-files-from-the-computer-or-codespaces-to-this-machine","title":"4d. Transfering files from the computer (or codespaces) to this machine","text":"
    • We will use the terminal to transfer some files from* your computer (or codespaces) to** this machine,
    • If you use cloud shell you can do it as well : create a dummy file in cloud shell

    • Follow this link to learn how to use the gcloud cli tool to transfer files to your instance TOC

    • For experts, it's possible to do it manually using rsync from ssh or scp

    • Transfer some files to your /home/${USER} directory

    • List them from your instance (ls)

    How do we do the opposite ?

    See section 5.

    "},{"location":"1_3_gcp_handson.html#4e-persistent-ssh-sessions-with-tmux","title":"4e. Persistent SSH sessions with TMUX","text":"
    • Connect to your GCE instance using SSH from the codespace
    • Question: What happens if you start a long computation and disconnect ?
    • Check that tmux is installed on the remote instance (run tmux). if not install it
    • Follow this tutorial: https://www.hamvocke.com/blog/a-quick-and-easy-guide-to-tmux/
    • To check you have understood you should be able to:
      • Connect to your remote instance with ssh
      • Start a tmux session
      • Launch a process (for example htop) inside it
      • Detach from the session (CTRL+B then type :detach)
      • Kill the ssh connection
      • Connect again
      • tmux attach to your session
      • Your process should still be here !

    Congratulations :)

    "},{"location":"1_3_gcp_handson.html#5-interacting-with-google-cloud-storage","title":"5. Interacting with Google Cloud Storage","text":"

    Here we will discover google cloud storage, upload some files from your computer and download them from your instance in the cloud

    • What is Google Cloud Storage ? Try to describe it with your own words

    • Use this tutorial to upload something from your computer to google cloud storage from the web browser (DO NOT DELETE THE FILES YET)

    Now we will download it using the google cloud CLI tool. Here's the documentation

    Follow the tutorial to learn how to do what you just did, but this time using gsutil from your codespace

    • List the content of the bucket you just created (if you deleted it previously, create a new one)
    • Upload a file to a bucket
    • Download a file from a bucket

    Optional : What if we want to do the same from the GCE instance ?

    • Now go back to your machine

    • Try to list bucket, download and upload files

    • Is it possible ?

    • If not, it's because you have to allow the instance to access google cloud storage

    • Shutdown the VM and edit it (like we did when we resized the instance)

    • Check \"access scopes\", select \"set access for each api\", and select \"storage / admin\"

    • Now restart you machine, connect back to it. You should be able to upload to google cloud storage now files now

    • You can delete the VM as well, we will not use it

    "},{"location":"1_3_gcp_handson.html#6-deep-learning-vm-ssh-and-port-forwarding","title":"6. Deep Learning VM, SSH and Port Forwarding","text":""},{"location":"1_3_gcp_handson.html#6a-deep-learning-vm","title":"6a. deep learning vm","text":"

    Here we will use the google cloud sdk to create a more complex VM with a pre-installed image and connect to its jupyter server

    Google Cloud Platform comes with a set of services targeted at data scientists called AI Platform, among them are Deep Learning VMs which are essentially preinstalled VMs (more or less the same configuration as google colab) with some bonuses.

    • What are \"Deep Learning VMs\" ? Try to use your own words
    • What would be the alternative if you wanted to get a machine with the same installation ?
    "},{"location":"1_3_gcp_handson.html#6b-create-a-google-compute-engine-instance-using-the-command-line","title":"6b. create a google compute engine instance using the command line","text":"

    Instead of using the browser to create this machine, we will be using the CLI to create instances

    export INSTANCE_NAME=\"fch-dlvm-1\" # <--- RENAME THIS !!!!!!!!!!\n\ngcloud compute instances create $INSTANCE_NAME \\\n        --zone=\"europe-west1-b\" \\\n        --image-family=\"common-cpu\" \\\n        --image-project=\"deeplearning-platform-release\" \\\n        --maintenance-policy=\"TERMINATE\" \\\n        --scopes=\"storage-rw\" \\\n        --machine-type=\"n1-standard-1\" \\\n        --boot-disk-size=\"50GB\" \\\n        --boot-disk-type=\"pd-standard\"\n
    • Notice the similarities between the first VM you created and this one,
    • What changed ?
    • If you want to learn more about compute images, image families etc... go here
    "},{"location":"1_3_gcp_handson.html#6c-connect-with-ssh-to-this-machine-with-port-forwarding","title":"6c. connect with ssh to this machine with port forwarding","text":"
    • Connect to your instance using the gcloud cli & ssh from the codespace with port forwarding

    • Forward the port 8888 when you're connecting to the instance

    • Documentation forward some ports as well

    Solution

    gcloud compute ssh user@machine-name --zone=europe-west1-b -- -L 8888:localhost:8888

    If you are in codespace, use the port forwarding utility, add a new port (8888). It may be done automatically.

    • Explore the machine the same way we did previously

    • You can see you have a conda envirnoment installed. Try to query the list of things installed

    Solution

    conda list pip list

    • is (py)torch installed ? If not, install it
    Solution

    pip list | grep torch pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

    "},{"location":"1_3_gcp_handson.html#6d-run-jupyter-lab-on-the-gce-vm","title":"6d. Run jupyter lab on the GCE VM","text":"
    • In the GCE VM, run jupyter lab

    • Copy the credentials

    • Connect to the port 8888 of the GitHub CodeSpace. You should be redirected to a jupyter instance

    Question

    Where are we ? Where is the jupyter lab hosted ? What is the difference between this and the jupyter lab we launched from codespace last week ?

    Don't disconnect from the VM, we will continue below

    "},{"location":"1_3_gcp_handson.html#7-end-to-end-example","title":"7. End to end example","text":"

    We will replicate the following setup (simplified)

    • Your development machine (the github codespace) has some training code
    • You have a \"high performance\" machine in the cloud
    • You want to transfer the training code to the VM
    • You want to run the training in a remote machine
    • Once the training is done you want to upload the model weights to google cloud storage

    • In your codespace, in a new folder (eg. training), copy the content of this

    Solution

    gcloud compute scp --recurse training ${USER}@{MACHINE}:/home/${USER}/

    You should find it on your GCE VM

    • Run it using python train.py --epochs 1 --save-model.

    It should train a neural network on the MNIST dataset. BONUS : Run it inside a tmux session ;)

    • Once it has finished, you should see a new file, the model weights mnist_cnn.pt

    • From the GCE VM : Upload the weights to the google cloud storage bucket you previously created

    Solution

    gcloud storage cp mnist_cnn.pt gs://(...)

    • From the GitHubCodespace : Download the model weights from google cloud storage
    Solution

    gcloud storage cp gs://(...) mnist_cnn.pt

    Success

    yay ! Don't forget to cleanup

    "},{"location":"1_3_gcp_handson.html#8-important-cleaning-up","title":"8. IMPORTANT : Cleaning up","text":"

    Warning

    • DELETE ALL THE BUCKES YOU CREATED
    • DELETE ALL THE GCP INSTANCES YOU CREATED
    • SHUTDOWN YOUR CODESPACE

    How to shutdown codespaces :

    • Click on stop codespace to shut it down (you \"pay\" for the disk with your free credits)
    • Click on kill codespace to delete it
    "},{"location":"1_3_gcp_handson.html#9-optional-introduction-to-infrastructure-as-code","title":"9. Optional - Introduction to infrastructure as code","text":"
    • This tutorial will guide you through google cloud deployment manager, which is a way to deploy google compute engine instances using configuration files

    • Don't forget to adapt machine configurations and zone to your use case (see above)

    If you run this, don't forget to clean everything up afterwards

    "},{"location":"1_3_gcp_handson.html#10-optional-managed-database","title":"10. Optional - Managed Database","text":"
    • I think you've just done a class on SQL databases

    • Here are the managed SQL services of google cloud

    Question

    Can you describe what it is ? What do you pay to google ? How much does it cost ? What is a managed service in cloud vocabulary ?

    • If you still have some code to interact with a database, you can try launching one here and redoing your classes
    "},{"location":"1_3_gcp_lecture.html","title":"Google Cloud Platform","text":"

    Link to slides

    "},{"location":"1_4_containers.html","title":"From Virtualisation to Containerisation","text":"

    Link to slides

    "},{"location":"1_4_docker_tp.html","title":"Docker: Hands on","text":"

    Note

    If you are lost, docker system prune will remove dangling images and stopped containers

    "},{"location":"1_4_docker_tp.html#0-how-to-run-this","title":"0. How to run this ?","text":"

    Abstract

    We will discover the basics of docker and you will be able to manipulate your first images and containers !

    You should be inside the Github Codespace you created and have google cloud SDK installed in it

    If not, refer to the previous tutorial and do step 2 and 3

    This codespace has everything you need, including docker

    If you want to do everything from your linux machine you can install docker but I don't recommend it for now

    "},{"location":"1_4_docker_tp.html#1-manipulating-docker-for-the-1st-time","title":"1. Manipulating docker for the 1st time","text":"

    Source: https://github.com/docker/labs

    To get started, let's run the following in our terminal:

    $ docker pull alpine\n

    The pull command fetches the alpine image from the Docker registry and saves it in our system. You can use the docker images command to see a list of all images on your system.

    $ docker images\nREPOSITORY              TAG                 IMAGE ID            CREATED             VIRTUAL SIZE\nalpine                 latest              c51f86c28340        4 weeks ago         1.109 MB\nhello-world             latest              690ed74de00f        5 months ago        960 B\n

    "},{"location":"1_4_docker_tp.html#11-docker-run","title":"1.1 Docker Run","text":"

    Great! Let's now run a Docker container based on this image. To do that you are going to use the docker run command.

    $ docker run alpine ls -l\ntotal 48\ndrwxr-xr-x    2 root     root          4096 Mar  2 16:20 bin\ndrwxr-xr-x    5 root     root           360 Mar 18 09:47 dev\ndrwxr-xr-x   13 root     root          4096 Mar 18 09:47 etc\ndrwxr-xr-x    2 root     root          4096 Mar  2 16:20 home\ndrwxr-xr-x    5 root     root          4096 Mar  2 16:20 lib\n......\n......\n

    What happened? Behind the scenes, a lot of stuff happened. When you call run,

    1. The Docker client contacts the Docker daemon
    2. The Docker daemon checks local store if the image (alpine in this case) is available locally, and if not, downloads it from Docker Store. (Since we have issued docker pull alpine before, the download step is not necessary)
    3. The Docker daemon creates the container and then runs a command in that container.
    4. The Docker daemon streams the output of the command to the Docker client

    When you run docker run alpine, you provided a command (ls -l), so Docker started the command specified and you saw the listing.

    Let's try something more exciting.

    $ docker run alpine echo \"hello from alpine\"\nhello from alpine\n
    OK, that's some actual output. In this case, the Docker client dutifully ran the echo command in our alpine container and then exited it. If you've noticed, all of that happened pretty quickly. Imagine booting up a virtual machine, running a command and then killing it. Now you know why they say containers are fast!

    Try another command.

    docker run alpine /bin/sh\n

    Wait, nothing happened! Is that a bug? Well, no. These interactive shells will exit after running any scripted commands, unless they are run in an interactive terminal - so for this example to not exit, you need to docker run -it alpine /bin/sh.

    You are now inside the container shell and you can try out a few commands like ls -l, uname -a and others. Exit out of the container by giving the exit command.

    Ok, now it's time to see the docker ps command. The docker ps command shows you all containers that are currently running.

    $ docker ps\nCONTAINER ID        IMAGE               COMMAND             CREATED             STATUS              PORTS               NAMES\n

    Since no containers are running, you see a blank line. Let's try a more useful variant: docker ps -a

    $ docker ps -a\nCONTAINER ID        IMAGE               COMMAND                  CREATED             STATUS                      PORTS               NAMES\n36171a5da744        alpine              \"/bin/sh\"                5 minutes ago       Exited (0) 2 minutes ago                        fervent_newton\na6a9d46d0b2f        alpine             \"echo 'hello from alp\"    6 minutes ago       Exited (0) 6 minutes ago                        lonely_kilby\nff0a5c3750b9        alpine             \"ls -l\"                   8 minutes ago       Exited (0) 8 minutes ago                        elated_ramanujan\nc317d0a9e3d2        hello-world         \"/hello\"                 34 seconds ago      Exited (0) 12 minutes ago                       stupefied_mcclintock\n

    What you see above is a list of all containers that you ran. Notice that the STATUS column shows that these containers exited a few minutes ago. You're probably wondering if there is a way to run more than just one command in a container. Let's try that now:

    $ docker run -it alpine /bin/sh\n/ # ls\nbin      dev      etc      home     lib      linuxrc  media    mnt      proc     root     run      sbin     sys      tmp      usr      var\n/ # uname -a\nLinux 97916e8cb5dc 4.4.27-moby #1 SMP Wed Oct 26 14:01:48 UTC 2016 x86_64 Linux\n
    Running the run command with the -it flags attaches us to an interactive tty in the container. Now you can run as many commands in the container as you want. Take some time to run your favorite commands.

    That concludes a whirlwind tour of the docker run command which would most likely be the command you'll use most often. It makes sense to spend some time getting comfortable with it. To find out more about run, use docker run --help to see a list of all flags it supports. As you proceed further, we'll see a few more variants of docker run.

    "},{"location":"1_4_docker_tp.html#12-terminology","title":"1.2 Terminology","text":"

    In the last section, you saw a lot of Docker-specific jargon which might be confusing to some. So before you go further, let's clarify some terminology that is used frequently in the Docker ecosystem.

    • Images - The file system and configuration of our application which are used to create containers. To find out more about a Docker image, run docker inspect alpine. In the demo above, you used the docker pull command to download the alpine image. When you executed the command docker run hello-world, it also did a docker pull behind the scenes to download the hello-world image.
    • Containers - Running instances of Docker images \u2014 containers run the actual applications. A container includes an application and all of its dependencies. It shares the kernel with other containers, and runs as an isolated process in user space on the host OS. You created a container using docker run which you did using the alpine image that you downloaded. A list of running containers can be seen using the docker ps command.
    • Docker daemon - The background service running on the host that manages building, running and distributing Docker containers.
    • Docker client - The command line tool that allows the user to interact with the Docker daemon.
    • Docker Store - A registry of Docker images, where you can find trusted and enterprise ready containers, plugins, and Docker editions. You'll be using this later in this tutorial.
    "},{"location":"1_4_docker_tp.html#20-webapps-with-docker","title":"2.0 Webapps with Docker","text":"

    Source: https://github.com/docker/labs

    Great! So you have now looked at docker run, played with a Docker container and also got the hang of some terminology. Armed with all this knowledge, you are now ready to get to the real stuff \u2014 deploying web applications with Docker.

    "},{"location":"1_4_docker_tp.html#21-run-a-static-website-in-a-container","title":"2.1 Run a static website in a container","text":"

    Note: Code for this section is in this repo in the website directory

    Let's start by taking baby-steps. First, we'll use Docker to run a static website in a container. The website is based on an existing image. We'll pull a Docker image from Docker Store, run the container, and see how easy it is to set up a web server.

    The image that you are going to use is a single-page website that was already created for this demo and is available on the Docker Store as dockersamples/static-site. You can download and run the image directly in one go using docker run as follows.

    docker run -d dockersamples/static-site\n

    Files:

    • Dockerfile
    • hello_docker.html

    Note: The current version of this image doesn't run without the -d flag. The -d flag enables detached mode, which detaches the running container from the terminal/shell and returns your prompt after the container starts. We are debugging the problem with this image but for now, use -d even for this first example.

    So, what happens when you run this command?

    Since the image doesn't exist on your Docker host, the Docker daemon first fetches it from the registry and then runs it as a container.

    Now that the server is running, do you see the website? What port is it running on? And more importantly, how do you access the container directly from our host machine?

    Actually, you probably won't be able to answer any of these questions yet! \u263a In this case, the client didn't tell the Docker Engine to publish any of the ports, so you need to re-run the docker run command to add this instruction.

    Let's re-run the command with some new flags to publish ports and pass your name to the container to customize the message displayed. We'll use the -d option again to run the container in detached mode.

    First, stop the container that you have just launched. In order to do this, we need the container ID.

    Since we ran the container in detached mode, we don't have to launch another terminal to do this. Run docker ps to view the running containers.

    $ docker ps\nCONTAINER ID        IMAGE                  COMMAND                  CREATED             STATUS              PORTS               NAMES\na7a0e504ca3e        dockersamples/static-site   \"/bin/sh -c 'cd /usr/\"   28 seconds ago      Up 26 seconds       80/tcp, 443/tcp     stupefied_mahavira\n

    Check out the CONTAINER ID column. You will need to use this CONTAINER ID value, a long sequence of characters, to identify the container you want to stop, and then to remove it. The example below provides the CONTAINER ID on our system; you should use the value that you see in your terminal.

    $ docker stop a7a0e504ca3e\n$ docker rm   a7a0e504ca3e\n

    Note: A cool feature is that you do not need to specify the entire CONTAINER ID. You can just specify a few starting characters and if it is unique among all the containers that you have launched, the Docker client will intelligently pick it up.

    Now, let's launch a container in detached mode as shown below:

    $ docker run --name static-site -e AUTHOR=\"Your Name\" -d -P dockersamples/static-site\ne61d12292d69556eabe2a44c16cbd54486b2527e2ce4f95438e504afb7b02810\n

    In the above command:

    • -d will create a container with the process detached from our terminal
    • -P will publish all the exposed container ports to random ports on the Docker host
    • -e is how you pass environment variables to the container
    • --name allows you to specify a container name
    • AUTHOR is the environment variable name and Your Name is the value that you can pass

    Now you can see the ports by running the docker port command.

    $ docker port static-site\n443/tcp -> 0.0.0.0:32772\n80/tcp -> 0.0.0.0:32773\n

    If you are on codespace, create a port forwarding on port 80 to connect to the website

    If you are running Docker for Mac, Docker for Windows, or Docker on Linux, you can open http://localhost:[YOUR_PORT_FOR 80/tcp]. For our example this is http://localhost:32773.

    If you are using Docker Machine on Mac or Windows, you can find the hostname on the command line using docker-machine as follows (assuming you are using the default machine).

    $ docker-machine ip default\n192.168.99.100\n
    You can now open http://<YOUR_IPADDRESS>:[YOUR_PORT_FOR 80/tcp] to see your site live! For our example, this is: http://192.168.99.100:32773.

    You can also run a second webserver at the same time, specifying a custom host port mapping to the container's webserver.

    $ docker run --name static-site-2 -e AUTHOR=\"Your Name\" -d -p 8888:80 dockersamples/static-site\n

    To deploy this on a real server you would just need to install Docker, and run the above docker command(as in this case you can see the AUTHOR is Docker which we passed as an environment variable).

    Now that you've seen how to run a webserver inside a Docker container, how do you create your own Docker image? This is the question we'll explore in the next section.

    But first, let's stop and remove the containers since you won't be using them anymore.

    $ docker stop static-site\n$ docker rm static-site\n

    Let's use a shortcut to remove the second site:

    $ docker rm -f static-site-2\n

    Run docker ps to make sure the containers are gone.

    $ docker ps\nCONTAINER ID        IMAGE               COMMAND             CREATED             STATUS              PORTS               NAMES\n
    "},{"location":"1_4_docker_tp.html#22-docker-images","title":"2.2 Docker Images","text":"

    In this section, let's dive deeper into what Docker images are. You will build your own image, use that image to run an application locally.

    Docker images are the basis of containers. In the previous example, you pulled the dockersamples/static-site image from the registry and asked the Docker client to run a container based on that image. To see the list of images that are available locally on your system, run the docker images command.

    $ docker images\nREPOSITORY             TAG                 IMAGE ID            CREATED             SIZE\ndockersamples/static-site   latest              92a386b6e686        2 hours ago        190.5 MB\nnginx                  latest              af4b3d7d5401        3 hours ago        190.5 MB\npython                 2.7                 1c32174fd534        14 hours ago        676.8 MB\npostgres               9.4                 88d845ac7a88        14 hours ago        263.6 MB\ncontainous/traefik     latest              27b4e0c6b2fd        4 days ago          20.75 MB\nnode                   0.10                42426a5cba5f        6 days ago          633.7 MB\nredis                  latest              4f5f397d4b7c        7 days ago          177.5 MB\nmongo                  latest              467eb21035a8        7 days ago          309.7 MB\nalpine                 3.3                 70c557e50ed6        8 days ago          4.794 MB\njava                   7                   21f6ce84e43c        8 days ago          587.7 MB\n

    Above is a list of images that I've pulled from the registry and those I've created myself (we'll shortly see how). You will have a different list of images on your machine. The TAG refers to a particular snapshot of the image and the ID is the corresponding unique identifier for that image.

    For simplicity, you can think of an image akin to a git repository - images can be committed with changes and have multiple versions. When you do not provide a specific version number, the client defaults to latest.

    For example you could pull a specific version of ubuntu image as follows:

    $ docker pull ubuntu:12.04\n

    If you do not specify the version number of the image then, as mentioned, the Docker client will default to a version named latest.

    So for example, the docker pull command given below will pull an image named ubuntu:latest:

    $ docker pull ubuntu\n

    To get a new Docker image you can either get it from a registry (such as the Docker Store) or create your own. There are hundreds of thousands of images available on Docker Store. You can also search for images directly from the command line using docker search.

    An important distinction with regard to images is between base images and child images.

    • Base images are images that have no parent images, usually images with an OS like ubuntu, alpine or debian.

    • Child images are images that build on base images and add additional functionality.

    Another key concept is the idea of official images and user images. (Both of which can be base images or child images.)

    • Official images are Docker sanctioned images. Docker, Inc. sponsors a dedicated team that is responsible for reviewing and publishing all Official Repositories content. This team works in collaboration with upstream software maintainers, security experts, and the broader Docker community. These are not prefixed by an organization or user name. In the list of images above, the python, node, alpine and nginx images are official (base) images. To find out more about them, check out the Official Images Documentation.

    • User images are images created and shared by users like you. They build on base images and add additional functionality. Typically these are formatted as user/image-name. The user value in the image name is your Docker Store user or organization name.

    "},{"location":"1_4_docker_tp.html#23-create-your-first-image","title":"2.3 Create your first image","text":"

    Note: The code for this section is in this repository in the flask-app directory.

    Now that you have a better understanding of images, it's time to create your own. Our goal here is to create an image that sandboxes a small Flask application.

    The goal of this exercise is to create a Docker image which will run a Flask app.

    We'll do this by first pulling together the components for a random cat picture generator built with Python Flask, then dockerizing it by writing a Dockerfile. Finally, we'll build the image, and then run it.

    • Create a Python Flask app that displays random cat pix
    • Write a Dockerfile
    • Build the image
    • Run your image
    • Dockerfile commands summary
    "},{"location":"1_4_docker_tp.html#231-create-a-python-flask-app-that-displays-random-cat-pix","title":"2.3.1 Create a Python Flask app that displays random cat pix","text":"

    For the purposes of this workshop, we've created a fun little Python Flask app that displays a random cat .gif every time it is loaded - because, you know, who doesn't like cats?

    Start by creating a directory called flask-app where we'll create the following files:

    • app.py
    • requirements.txt
    • templates/index.html
    • Dockerfile

    Make sure to cd flask-app before you start creating the files, because you don't want to start adding a whole bunch of other random files to your image.

    "},{"location":"1_4_docker_tp.html#apppy","title":"app.py","text":"

    Create the app.py with the following content:

    from flask import Flask, render_template\nimport random\n\napp = Flask(__name__)\n\n# list of cat images\nimages = [\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif1.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif2.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif3.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif4.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif5.gif\",\n   \"https://storage.googleapis.com/fchouteau-isae-cloud/gifs/gif6.gif\",\n    ]\n\n@app.route('/')\ndef index():\n    url = random.choice(images)\n    return render_template('index.html', url=url)\n\nif __name__ == \"__main__\":\n    app.run(host=\"0.0.0.0\")\n
    "},{"location":"1_4_docker_tp.html#requirementstxt","title":"requirements.txt","text":"

    In order to install the Python modules required for our app, we need to create a file called requirements.txt and add the following line to that file:

    flask\ntyper\n
    "},{"location":"1_4_docker_tp.html#templatesindexhtml","title":"templates/index.html","text":"

    Create a directory called templates and create an index.html file in that directory with the following content in it:

    <html>\n  <head>\n    <style type=\"text/css\">\n      body {\n        background: black;\n        color: white;\n      }\n      div.container {\n        max-width: 500px;\n        margin: 100px auto;\n        border: 20px solid white;\n        padding: 10px;\n        text-align: center;\n      }\n      h4 {\n        text-transform: uppercase;\n      }\n    </style>\n  </head>\n  <body>\n    <div class=\"container\">\n      <h4>Cat Gif of the day</h4>\n      <img src=\"{{url}}\" />\n    </div>\n  </body>\n</html>\n
    "},{"location":"1_4_docker_tp.html#232-write-a-dockerfile","title":"2.3.2 Write a Dockerfile","text":"

    We want to create a Docker image with this web app. As mentioned above, all user images are based on a base image. Since our application is written in Python, we will build our own Python image based on Alpine. We'll do that using a Dockerfile.

    A Dockerfile is a text file that contains a list of commands that the Docker daemon calls while creating an image. The Dockerfile contains all the information that Docker needs to know to run the app \u2014 a base Docker image to run from, location of your project code, any dependencies it has, and what commands to run at start-up. It is a simple way to automate the image creation process. The best part is that the commands you write in a Dockerfile are almost identical to their equivalent Linux commands. This means you don't really have to learn new syntax to create your own Dockerfiles.

    1. Create a file called Dockerfile, and add content to it as described below.

    We'll start by specifying our base image, using the FROM keyword:

    FROM alpine:3.18\n

    Note : If you use the latest version of alpine which is 3.20, follow this tutorial to handle an error you might be getting

    1. The next step usually is to write the commands of copying the files and installing the dependencies. But first we will install the Python pip package to the alpine linux distribution. This will not just install the pip package but any other dependencies too, which includes the python interpreter. Add the following RUN command next. Additionnally, we will do something to handle the newest python rules []
    RUN apk add --update py-pip\n
    1. Let's add the files that make up the Flask Application.

    Install all Python requirements for our app to run. This will be accomplished by adding the lines:

    COPY requirements.txt /usr/src/app/\nRUN pip install --no-cache-dir -r /usr/src/app/requirements.txt\n

    Copy the files you have created earlier into our image by using COPY command.

    COPY app.py /usr/src/app/\nCOPY templates/index.html /usr/src/app/templates/\n
    1. Specify the port number which needs to be exposed. Since our flask app is running on 5000 that's what we'll expose.
    EXPOSE 5000\n
    1. The last step is the command for running the application which is simply - python ./app.py. Use the CMD command to do that:
    CMD [\"python\", \"/usr/src/app/app.py\"]\n

    The primary purpose of CMD is to tell the container which command it should run by default when it is started.

    1. Verify your Dockerfile.

    Our Dockerfile is now ready. This is how it looks:

    # our base image\nFROM alpine:3.18\n\n# Install python and pip\nRUN apk add --update py-pip\n\n# install Python modules needed by the Python app\nCOPY requirements.txt /usr/src/app/\nRUN pip install --no-cache-dir -r /usr/src/app/requirements.txt\n\n# copy files required for the app to run\nCOPY app.py /usr/src/app/\nCOPY templates/index.html /usr/src/app/templates/\n\n# tell the port number the container should expose\nEXPOSE 5000\n\n# run the application\nCMD [\"python\", \"/usr/src/app/app.py\"]\n
    "},{"location":"1_4_docker_tp.html#233-build-the-image","title":"2.3.3 Build the image","text":"

    Now that you have your Dockerfile, you can build your image. The docker build command does the heavy-lifting of creating a docker image from a Dockerfile.

    The docker build command is quite simple - it takes an optional tag name with the -t flag, and the location of the directory containing the Dockerfile - the . indicates the current directory:

    docker build -t myfirstapp:1.0 .

    $ docker build -t myfirstapp:1.0 .\nSending build context to Docker daemon 9.728 kB\nStep 1 : FROM alpine:18\n ---> 0d81fc72e790\nStep 2 : RUN apk add --update py-pip\n ---> Running in 8abd4091b5f5\nfetch http://dl-4.alpinelinux.org/alpine/v3.3/main/x86_64/APKINDEX.tar.gz\nfetch http://dl-4.alpinelinux.org/alpine/v3.3/community/x86_64/APKINDEX.tar.gz\n(1/12) Installing libbz2 (1.0.6-r4)\n(2/12) Installing expat (2.1.0-r2)\n(3/12) Installing libffi (3.2.1-r2)\n(4/12) Installing gdbm (1.11-r1)\n(5/12) Installing ncurses-terminfo-base (6.0-r6)\n(6/12) Installing ncurses-terminfo (6.0-r6)\n(7/12) Installing ncurses-libs (6.0-r6)\n(8/12) Installing readline (6.3.008-r4)\n(9/12) Installing sqlite-libs (3.9.2-r0)\n(10/12) Installing python (2.7.11-r3)\n(11/12) Installing py-setuptools (18.8-r0)\n(12/12) Installing py-pip (7.1.2-r0)\nExecuting busybox-1.24.1-r7.trigger\nOK: 59 MiB in 23 packages\n ---> 976a232ac4ad\nRemoving intermediate container 8abd4091b5f5\nStep 3 : COPY requirements.txt /usr/src/app/\n ---> 65b4be05340c\nRemoving intermediate container 29ef53b58e0f\nStep 4 : RUN pip install --no-cache-dir -r /usr/src/app/requirements.txt\n ---> Running in a1f26ded28e7\nCollecting Flask==0.10.1 (from -r /usr/src/app/requirements.txt (line 1))\n  Downloading Flask-0.10.1.tar.gz (544kB)\nCollecting Werkzeug>=0.7 (from Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading Werkzeug-0.11.4-py2.py3-none-any.whl (305kB)\nCollecting Jinja2>=2.4 (from Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading Jinja2-2.8-py2.py3-none-any.whl (263kB)\nCollecting itsdangerous>=0.21 (from Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading itsdangerous-0.24.tar.gz (46kB)\nCollecting MarkupSafe (from Jinja2>=2.4->Flask==0.10.1->-r /usr/src/app/requirements.txt (line 1))\n  Downloading MarkupSafe-0.23.tar.gz\nInstalling collected packages: Werkzeug, MarkupSafe, Jinja2, itsdangerous, Flask\n  Running setup.py install for MarkupSafe\n  Running setup.py install for itsdangerous\n  Running setup.py install for Flask\nSuccessfully installed Flask-0.10.1 Jinja2-2.8 MarkupSafe-0.23 Werkzeug-0.11.4 itsdangerous-0.24\nYou are using pip version 7.1.2, however version 8.1.1 is available.\nYou should consider upgrading via the 'pip install --upgrade pip' command.\n ---> 8de73b0730c2\nRemoving intermediate container a1f26ded28e7\nStep 5 : COPY app.py /usr/src/app/\n ---> 6a3436fca83e\nRemoving intermediate container d51b81a8b698\nStep 6 : COPY templates/index.html /usr/src/app/templates/\n ---> 8098386bee99\nRemoving intermediate container b783d7646f83\nStep 7 : EXPOSE 5000\n ---> Running in 31401b7dea40\n ---> 5e9988d87da7\nRemoving intermediate container 31401b7dea40\nStep 8 : CMD python /usr/src/app/app.py\n ---> Running in 78e324d26576\n ---> 2f7357a0805d\nRemoving intermediate container 78e324d26576\nSuccessfully built 2f7357a0805d\n

    If you don't have the alpine:3.18 image, the client will first pull the image and then create your image. Therefore, your output on running the command will look different from mine. If everything went well, your image should be ready! Run docker images and see if your image (<YOUR_USERNAME>/myfirstapp) shows.

    "},{"location":"1_4_docker_tp.html#234-run-your-image","title":"2.3.4 Run your image","text":"

    The next step in this section is to run the image and see if it actually works.

    $ docker run -p 8888:5000 --name myfirstapp myfirstapp:1.0\n * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)\n

    Head over to http://localhost:8888 and your app should be live. Note If you are using Docker Machine, you may need to open up another terminal and determine the container ip address using docker-machine ip default.

    Hit the Refresh button in the web browser to see a few more cat images.

    "},{"location":"1_4_docker_tp.html#235-dockerfile-commands-summary","title":"2.3.5 Dockerfile commands summary","text":"

    Here's a quick summary of the few basic commands we used in our Dockerfile.

    • FROM starts the Dockerfile. It is a requirement that the Dockerfile must start with the FROM command. Images are created in layers, which means you can use another image as the base image for your own. The FROM command defines your base layer. As arguments, it takes the name of the image. Optionally, you can add the Docker Cloud username of the maintainer and image version, in the format username/imagename:version.

    • RUN is used to build up the Image you're creating. For each RUN command, Docker will run the command then create a new layer of the image. This way you can roll back your image to previous states easily. The syntax for a RUN instruction is to place the full text of the shell command after the RUN (e.g., RUN mkdir /user/local/foo). This will automatically run in a /bin/sh shell. You can define a different shell like this: RUN /bin/bash -c 'mkdir /user/local/foo'

    • COPY copies local files into the container.

    • CMD defines the commands that will run on the Image at start-up. Unlike a RUN, this does not create a new layer for the Image, but simply runs the command. There can only be one CMD per a Dockerfile/Image. If you need to run multiple commands, the best way to do that is to have the CMD run a script. CMD requires that you tell it where to run the command, unlike RUN. So example CMD commands would be:

      CMD [\"python\", \"./app.py\"]\n\n  CMD [\"/bin/bash\", \"echo\", \"Hello World\"]\n
    • EXPOSE creates a hint for users of an image which ports provide services. It is included in the information which can be retrieved via $ docker inspect <container-id>.

    Note: The EXPOSE command does not actually make any ports accessible to the host! Instead, this requires publishing ports by means of the -p flag when using $ docker run.

    • PUSH pushes your image to Docker Cloud, or alternately to a private registry

    Note: If you want to learn more about Dockerfiles, check out Best practices for writing Dockerfiles.

    "},{"location":"1_4_docker_tp.html#3-running-cli-apps-packaged-in-docker-while-mounting-volumes","title":"3. Running CLI apps packaged in docker while mounting volumes","text":"

    Beyond serving web applications, Docker also enables the deployment of packaged applications, such as command-line interfaces and training scripts. This allows for seamless delivery of self-contained apps with bespoke installations to end-users. A particularly valuable use case is packaging machine learning environments for distributed training, facilitating efficient collaboration and scalability

    To do so, we have to learn about : - Executing command line applications packaged inside docker images - Passing both text and files inputs, including files not in the docker image - Getting accesss to file outputs such as models

    For that we will do several things : - Write a CLI application using typer, a very useful tool for the rest of your career - Package the CLI application with both text / file inputs in a docker image - Mounting volumes when running a docker images to provide it with the input files, and having access to the results from the main computer

    "},{"location":"1_4_docker_tp.html#31-a-local-cli-application","title":"3.1 A local CLI application","text":"
    • Let's modify the app.py in 2. with the following code.
    import time\nfrom pathlib import Path\nfrom typing import Annotated, Optional\n\nimport typer\n\napp = typer.Typer()\n\n\n@app.command()\ndef say_hello(name: str):\n    typer.echo(f\"Hello {name}\")\n\n\n@app.command()\ndef run_training(\n    config: Annotated[\n        Path,\n        typer.Option(\n            exists=True,\n            file_okay=True,\n            dir_okay=False,\n            writable=False,\n            readable=True,\n            resolve_path=True,\n        ),\n    ],\n    output_dir: Annotated[\n        Path,\n        typer.Option(\n            dir_okay=True,\n            writable=True,\n            readable=True,\n            resolve_path=True,\n            file_okay=False,\n        ),\n    ],\n):\n    text = config.read_text()\n    print(f\"Config file contents: {text}\")\n\n    print(f\"Running training in {output_dir}...\")\n\n    time.sleep(10)\n\n    output_dir.mkdir(exist_ok=True,parents=True)\n\n    with open(output_dir / \"results.txt\", \"w\") as f:\n        f.write(\"Training successful !\")\n\n\nif __name__ == \"__main__\":\n    app()\n
    • Test the application locally using pip install typerthen python app.py say-hello {my name} or python app.py run-training --config {my config} --output-dir {somewhere}
    "},{"location":"1_4_docker_tp.html#32-packaging-it-in-a-dockerfile","title":"3.2 Packaging it in a dockerfile","text":"

    We will now package it in a docker file

    • Modify the dockerfile :
    • Replace CMD [\"python3\", \"/usr/src/app/app.py\"]
    • By ENTRYPOINT [\"python3\", \"/usr/src/app/app.py\"]

    • Differences between CMD and ENTRYPOINT

    • Rebuild your docker image (maybe give it another name)

    "},{"location":"1_4_docker_tp.html#33-mounting-volumes","title":"3.3 Mounting volumes","text":"
    • Now to run the CLI you just have to pass the arguments when running the docker docker run --rm {your image} {your args}. Try it with docker run {...} hello {your name}

    Warning

    once you have built your container and it works, don't rebuild it again ! We will test the volume mounting options now

    • In order to pass a config file, or data to your docker, you need to make it available to your docker. To do that, we have to mount volumes

    Create a dummy config file (config.txt) in another folder (ex: config/) then mount it when you run the docker container. You can expose the output directory as well to be able to get your results

    docker run --rm \\\n  -v {local path to your configs}:/home/configs \\\n  -v {local path to your outputs}:/home/outputs \\\n  --workdir /home/ \\\n  {your image} \\\n  run-training --config {path to your config in DOCKER, eg /home/configs/config.txt}  \\\n  --output-dir /home/outputs/\n

    Note that since you mounted volumes, you must pass the local path in the docker container to your config file for it to work and not the path in your codespace

    Success

    To be successful here you have to be able to pass a config file that is in your codespace and get the results in your codespace, all while not rebuilding the image as long as the first hello passes

    "},{"location":"1_4_docker_tp.html#4-containers-registry","title":"4. Containers Registry","text":"

    Remember Container Registries ? Here as some explainers

    The main container registry is dockerhub, https://hub.docker.com/

    All docker engines that have access to the internet have access to this main hub, and this is where we pulled our base images from before

    Example, the Python Image

    Google Cloud has an Artifacts Registry per project, which ensures the docker images you build are accessible for the people who have access to your project only.

    We will follow this tutorial to push our images to artifact registry

    • First, create a Docker Artifact registry using this tutorial, example fch-sdd2425-artifacts-registry (that's mine, name it with your name). Set the repository in multi-region/europe

    • Pushing our images requires authenticating, gcloud auth configure-docker europe-docker.pkg.dev

    • Pushing our images requires tagging them in a specific way : europe-docker.pkg.dev/${PROJECT_ID}/${REPO_ID}/${IMAGE}:${TAG}

    • Use the docker cli to tag your previous myfirstapp image to the right namespace

    docker tag myfirstapp:1.0 europe-docker.pkg.dev/${PROJECT_ID}/${REPO_ID}/myfirstapp:1.0

    • Upload it on container registry

    docker push europe-docker.pkg.dev/${PROJECT_ID}/${REPO_ID}/[IMAGE]:[TAG]

    Hint

    to get your project id: PROJECT_ID=$(gcloud config get-value project 2> /dev/null) to get your artifact repository id look at this page, you can get your project id this way as well

    • Go to your artifact registry https://console.cloud.google.com/artifacts, you should see your docker image :)
    "},{"location":"1_4_docker_tp.html#5-bonus-data-science-standardized-environment-and-mounting-volumes","title":"5. Bonus. Data Science Standardized Environment and mounting volumes","text":"

    Note : This may not run in your native github codespace due to the storage available. If you encounter a storage error, run docker system prune to cleanup everything

    The purpose of this tutorial is to reproduce a sort of google colab environment using docker and github codespace.

    "},{"location":"1_4_docker_tp.html#51-intro","title":"5.1 Intro","text":"

    Those of us who work on a team know how hard it is to create a standardize development environment. Or if you have ever updated a dependency and had everything break, you understand the importance of keeping development environments isolated.

    Using Docker, we can create a project / team image with our development environment and mount a volume with our notebooks and data.

    The benefits of this workflow are that we can:

    • Separate out projects
    • Spin up a container to onboard new employees
    • Build an automated testing pipeline to confirm upgrade dependencies do not break code
    "},{"location":"1_4_docker_tp.html#52-jupyter-stack-docker-image","title":"5.2 Jupyter Stack Docker Image","text":"

    For this exercise we will use Jupyter Stack Docker Image which is a fully configured docker image that can be used as a data science container

    Take a look at the documentation and the dockerhub repository

    To get the docker image, run

    docker pull jupyter/scipy-notebook:lab-3.5.3\n
    "},{"location":"1_4_docker_tp.html#53-get-the-algorithm-in-ml-git-in-your-virtual-machine","title":"5.3 Get the algorithm in ML git in your Virtual Machine","text":"
    • From your vm, run git clone https://github.com/erachelson/MLclass.git, this should setup your AML class inside your VM
    "},{"location":"1_4_docker_tp.html#54-mounting-volumes-and-ports","title":"5.4 Mounting volumes and ports","text":"

    Now let's run the image. This container has a jupyter notebook accessible from port 8080 so we will need to map the host port 8888 (the one accessible from the ssh tunnel) to the docker port 8080, we will use port forwarding

    We will also need to make available the notebooks on the VM to the container... we will mount volumes. Your data is located in /home/${USER}/MLClass and we want to miunt it in /tmp/workdir

    docker run --rm -it \\\n  -p 8888:8888 \\\n  -v /home/${USER}/MLclass:/home/jovyan/work/MLClass \\\n  --workdir /home/jovyan/work \\\n  jupyter/scipy-notebook:lab-3.5.3\n

    Note: this image is large, delete it afterwards using docker rmi

    Options breakdown:

    • --rm remove the container when we stop it
    • -it run the container in interactive mode
    • -p forward port from host:container
    • other: options from the kaggle container

    You should now see a jupyter lab with mlclass accessible if you do another port mapping

    So to connect to the jupyter lab we mapped the ports local 8888 to vm 8888 and vm 8888 to docker 8888

    We also exposed the local disk to the container

    "},{"location":"1_4_docker_tp.html#6-bonus-docker-compose","title":"6. Bonus - Docker Compose","text":"

    Docker Compose is used to manage applications and increase efficiency in container development. Configurations are defined in a single YAML file, making applications easy to build and scale. Docker Compose is often used to set up a local environment

    The tutorial below aims to introduce fundamental concepts of Docker Compose by guiding you through the development of a basic Python web application.

    Using the Flask framework, the application features a hit counter in Redis, providing a practical example of how Docker Compose can be applied in web development scenarios.

    The concepts demonstrated here should be understandable even if you're not familiar with Python.

    This is a non-normative example that just highlights the key things you can do with Compose.

    https://docs.docker.com/compose/gettingstarted/

    You can find a more extensive example here :

    https://hackernoon.com/practical-introduction-to-docker-compose-d34e79c4c2b6

    https://github.com/docker/labs/blob/master/beginner/chapters/votingapp.md

    "},{"location":"1_4_docker_tp.html#7-bonus-using-google-cloud-tools-for-docker","title":"7. Bonus - Using Google Cloud Tools for Docker","text":"

    Using codespace, you should be able to do the Hello World Dockerfile exercise except that instead of using docker build you use Google Cloud Build

    Tutorial: https://cloud.google.com/cloud-build/docs/quickstart-docker

    Example command :gcloud builds submit --tag eu.gcr.io/$PROJECT_ID/{image}:{tag} .

    Help

    to get your project id: PROJECT_ID=$(gcloud config get-value project 2> /dev/null)

    Example

    Try to build the hello world app

    "},{"location":"1_4_docker_tp.html#8-bonus-going-further","title":"8. Bonus - Going further","text":"

    https://container.training/

    "},{"location":"1_5_be.html","title":"Bureau d'\u00e9tudes Cloud & Docker","text":"

    Link to slides

    "},{"location":"1_5_be.html#objectives-of-this-be","title":"Objectives of this BE","text":"

    This Bureau d'\u00e9tudes (BE, for short) will guide you through the essential notions to be able to manipulate with regard to cloud computer and docker,

    We will go through several steps

    • Working in a remote environment (in a GitHub CodeSpace, inside a VM)
    • Creation and ssh connection to virtual machine instances
    • Using managed storage capabilities (gcs://)
    • Creating your own docker images
    • Exchanging docker images through a Container Registry
    • Pulling and running docker images created by your teammates

    In particular, this workflow:

    Warning

    Please read all the text in the question before executing the step-by-step instructions because there might be help or indications after the instructions.

    "},{"location":"1_5_be.html#how-to-run-this-be","title":"How to run this BE","text":"

    The best way to run this BE is to setup a Github Codespace VM and install the google cloud sdk. Refer to the previous hands-on to learn more

    We will be using the gcloud CLI for the following:

    • Create a GCE Virtual Machine
    • Connect to SSH with port forwarding to said machine

    For the rest of this walkthrough, if it is specified \"from your local machine\", this will be \"github codespace\"

    If it is specified \"inside the VM\", this means that you should run it inside the GCE VM, which means you need to connect to it using an SSH tunnel first...

    \ud83d\ude4f\ud83c\udffb Use Google Chrome without any ad blockers if you have any issues, or use the local VSCode + CodeSpace extension

    Warning

    \u26a0\ufe0f Normally you will do everything from your browser, connected to the github codespace, so it should work \u26a0\ufe0f if you have any issues, switch your wi-fi connection between eduroam (preferred), isae-edu or a 4G hotspot

    "},{"location":"1_5_be.html#team-composition-setup","title":"Team composition & Setup","text":"

    You should be in team of 5, however this will work with a minimum of 2 people.

    Each team member picks a different cute mascot and remembers it:

    • \ud83d\udc08 cat
    • \ud83d\udc15 dog
    • \ud83d\udc7d (baby) yoda
    • \ud83e\udd89 owl
    • \ud83d\udc3c panda

    Find a groupname, because you will need it for the next steps

    One of the team member will add the others into their GCP project so that everyone can collaborate.

    Designate a \"project manager\" (the person who is the most comfortable with the google cloud platform UI). That person will have the hard task of giving access to his/her GCP project to the other team members to enable collaboration.

    This means that the project of the \"team leader\" will be billed a little more for the duration of this BE, so please be kind with the project and apply good cloud hygiene :)

    Rest assured, this will not cost very much !

    How to do that ?

    Go to the \"IAM & Admin / IAM\" section of the Google Cloud Console, then locate the \"grant access\",

    Grant access to your each of your teammates using the \"Editor Role\" (Basic -> Editor)

    Here are some screenshots to help you

    "},{"location":"1_5_be.html#1-build-ship-run-deploy-as-a-team","title":"1 - Build, Ship, Run (Deploy) as a Team","text":""},{"location":"1_5_be.html#11-build","title":"1.1 - Build","text":""},{"location":"1_5_be.html#111-start-development-environment-github-codespace","title":"1.1.1 - Start Development Environment (Github Codespace)","text":"
    • Launch your Github Codespaces instance from the preconfigured repository https://github.com/fchouteau/isae-cloud-computing-codespace
    • Ensure that the google cloud sdk is installed (it should be done automatically) and configured to the project that you were given access to (run gcloud init like last time)
    "},{"location":"1_5_be.html#112-get-the-necessary-resources-from-google-cloud-storage","title":"1.1.2 - Get the necessary resources from Google Cloud Storage","text":"

    From your github codespace,

    The resources are located at the URI gs://fchouteau-isae-cloud/be/${MASCOT},

    Your ${MASCOT} name is either:

    • cat
    • dog
    • owl
    • panda
    • yoda

    I advise you to export MASCOT=.... to remember it :)

    ONLY DOWNLOAD your mascot resources (no cheating ! this will only cause confusion later)

    Download them to your instance using the gcloud cli (refer to your previous work for more information)

    Hint

    gsutil -m cp -r {source} {destination}\n
    Remember that google storage URIs always begin with gs://

    Go to (cd) the folder where you downloaded your resources

    You should see a file structure like this

    fchouteau@be-cloud-mascot:~/be$ tree yoda  -L 2\nyoda\n\u251c\u2500\u2500 app.py\n\u251c\u2500\u2500 AUTHOR.txt\n\u251c\u2500\u2500 Dockerfile\n\u251c\u2500\u2500 favicon.ico\n\u251c\u2500\u2500 imgs\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 1.gif\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 2.gif\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 3.gif\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 4.gif\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 5.gif\n\u2514\u2500\u2500 template.html.jinja2\n\n1 directory, 10 files\n
    "},{"location":"1_5_be.html#113-build-your-docker-image","title":"1.1.3 - Build your docker image","text":"

    Question

    • Look at the Dockerfile (cat Dockerfile), what does it seem to do ?
    • Look at app.py (cat app.py). What is Flask ? What does it seem to do ?
    • Edit the file AUTHOR.txt to add your name instead of the placeholder
    • Refer to your previous work to build the image

    Danger

    On which port is your flask app running ? (cat Dockerfile) Note it carefully ! You will need to communicate it to your teammate :)

    • When building the image, name it appropriately... like eu.gcr.io/${PROJECT_ID}/webapp-gif:${GROUPNAME}-${MASCOT}-1.0 !
    Hint

    to get your project id:

    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\n

    • now if you list your images you should see it !
    REPOSITORY                                      TAG                 IMAGE ID            CREATED             SIZE\neu.gcr.io/{your project name}/{your-app}    1.0                 d1c5993848bf        2 minutes ago       62.1MB\n

    Question

    Describe concisely to your past self what is a Docker Image

    "},{"location":"1_5_be.html#12-ship","title":"1.2 - Ship","text":""},{"location":"1_5_be.html#121-push-your-docker-image-in-the-shared-container-registry","title":"1.2.1 - Push your Docker image in the shared Container Registry","text":"
    • One of the team member must first create a shared Artifact Registry

    • Now push your image on the shared container registry

    • Help your team mates so that everybody can build his/her Docker Image

    Question

    Describe succintly to your past self what is a Container Registry

    "},{"location":"1_5_be.html#13-run-deploy","title":"1.3 - Run (deploy)","text":""},{"location":"1_5_be.html#131-create-google-compute-engine-vm","title":"1.3.1 - Create Google Compute Engine VM","text":"

    Each team member creates a separate GCE Instance (Virtual Machine) on the same project,

    Here, you will create a Google Compute Engine instance, preconfigured with everything you need,

    If you use the google cloud CLI (from your codespace), you can use this

    First, set a variable with the name of your instance,

    export INSTANCE_NAME=\"be-cloud-mascot-{yourgroup}-{yourname}\" # Don't forget to replace values !\n

    Then create your VM

    gcloud compute instances create $INSTANCE_NAME \\\n        --zone=\"europe-west1-b\" \\\n        --image-family=\"common-cpu\" \\\n        --image-project=\"deeplearning-platform-release\" \\\n        --maintenance-policy=\"TERMINATE\" \\\n        --scopes=\"storage-rw\" \\\n        --machine-type=\"n1-standard-1\" \\\n        --boot-disk-size=\"50GB\" \\\n        --boot-disk-type=\"pd-standard\"\n

    If you have an issue with quota, use any of europe-west4-{a,b,c,d} or europe-west1-{b,c,d}

    If you use the web interface, follow this

    Your browser does not support the video tag.

    Question

    Describe concisely to your past self what is a Virtual Machine and what is Google Compute Engine

    "},{"location":"1_5_be.html#132-connect-using-ssh-to-the-instance","title":"1.3.2 - Connect using SSH to the instance","text":"

    If you are using the google cloud sdk from github codespace, you can connect to ssh using the usual command.

    Tunnel the following ports to your local machine:

    • 8080: This is reserved for a jupyter lab session by default, it makes it easy to see & edit text
    • 8081: You will neeed to run containers and expose them on a port
    Hint
    gcloud compute ssh {user}@{instance} -- \\\n    -L {client-port}:localhost:{server-port} \\\n    -L {client-port-2}:localhost:{server-port-2}\n

    Go to your browser and connect to http://localhost:8080, you should be in a jupyter lab where you can access a terminal, a text editor etc...

    Question

    Where is this jupyter lab hosted ? Describe concisely what is a SSH Tunnel and what is port forwarding

    "},{"location":"1_5_be.html#133-pull-docker-images-from-your-teammate","title":"1.3.3 - Pull Docker Images from your teammate","text":"

    You should be inside the your VM,

    Question

    How to check that you're inside your VM ? On your terminal you should see user@hostname at the beginning. Hostname should be the name of your VM

    • Select another mascot and pull the corresponding docker image from the registry

    • List the docker images you have docker images.

    "},{"location":"1_5_be.html#134-run-docker-containers-from-their-docker-images","title":"1.3.4 - Run Docker Containers from their Docker Images","text":"
    • Run your container while mapping the correct port to your VM 8081. Which port is it ? Well, ask the one who built the image.

    • When running the container, setup the USER environment variable to your name !

    Hint

    the port is not the same as yours if you don't set the username, it will come to bite your later ;)

    "},{"location":"1_5_be.html#135-display-the-results-share-them","title":"1.3.5 - Display the results & share them","text":"
    • You just launched a webapp on the port 8081 of your remote instance.

    • If you have a ssh tunnel directly from your laptop, ensure that you made a tunnel for your port 8081 to any port of your machine then, go to http://localhost:(your port) inside your browser. The resulting webpage should appear

    • If you are using github codespace, open web preview on port 8081 (you should have a tunnel running between your github codespace and your GCE instance)

    • You can also publicly share the codespace preview link so that other people can see your results

    Checklist

    • The webpage should display the mascot your chose to run
    • The webpage should display the name of the author (not you)
    • The webpage should display your name

    Bug

    If any of the three item above are missing, find the bug and solve it :)

    Example

    Try to refresh the webpage to make more gifs appear

    Share your result on slack

    "},{"location":"1_5_be.html#14-cleanup-the-gcp-project","title":"1.4. Cleanup the GCP project","text":"
    • Remove your VMs (DELETE them)
    • Remove images from the container registry
    "},{"location":"1_5_be.html#15-yay","title":"1.5. Yay !","text":"

    Success

    \ud83c\udf89 you have successfully finished the mandatory part of the BE. You know how to manipulate the basic notions around cloud computing and docker so that you won't be completely lost when someone will talk about it

    Continue the BE below (you can do it alone or by group of 2 or 3) to discover more nice things !

    "},{"location":"1_5_be.html#2-another-deployment","title":"2 - Another deployment","text":""},{"location":"1_5_be.html#21-lets-discover-streamlit","title":"2.1 - Let's discover Streamlit","text":"

    We will now introduce streamlit, which is a very nice tool to build quick webapps in python !

    In this TP you will build your first interactive webapp in python and package it in a container.

    First, look at this video,

    Your browser does not support the video tag.

    Then, take a look at an introduction to streamlit and the streamlit application gallery

    Question

    Can you describe what exactly is streamlit ? Could you find any way it could be useful to you ?

    "},{"location":"1_5_be.html#22-your-first-streamlit-application","title":"2.2 Your first streamlit application","text":"

    Take a look at the code below,

    import streamlit as st\nfrom streamlit_image_comparison import image_comparison\nimport cv2\n\nst.set_page_config(\"Webb Space Telescope vs Hubble Telescope\", \"\ud83d\udd2d\")\n\nst.header(\"\ud83d\udd2d J. Webb Space Telescope vs Hubble Telescope\")\n\nst.write(\"\")\n\"This is a reproduction of the fantastic [WebbCompare](https://www.webbcompare.com/index.html) app by [John Christensen](https://twitter.com/JohnnyC1423). It's built in Streamlit and takes only 10 lines of Python code. If you like this app, please star [John's original repo](https://github.com/JohnEdChristensen/WebbCompare)!\"\nst.write(\"\")\n\nst.markdown(\"### Southern Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/southern_nebula_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/southern_nebula_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\n\nst.markdown(\"### Galaxy Cluster SMACS 0723\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/deep_field_700.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/deep_field_700.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Carina Nebula\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/carina_2800.png\",\n    img2=\"https://www.webbcompare.com/img/webb/carina_2800.jpg\",\n    label1=\"Hubble\",\n    label2=\"Webb\",\n)\n\nst.markdown(\"### Stephan's Quintet\")\nimage_comparison(\n    img1=\"https://www.webbcompare.com/img/hubble/stephans_quintet_2800.jpg\",\n    img2=\"https://www.webbcompare.com/img/webb/stephans_quintet_2800.jpg\",\n    label1=\"Hubble\"\n    label2=\"Webb\",\n)\n

    Question

    Can you describe, by reading the documentation, what does the code do ?

    "},{"location":"1_5_be.html#23-local-deployment-in-codespace","title":"2.3 - Local deployment in codespace","text":"

    First, we will install in the codespace the dependencies for our application,

    pip install streamlit streamlit opencv-python-headless streamlit-image-comparison

    Then create a file streamlit_jswt.py and copy/paste the code above.

    Then execute it streamlit run streamlit_jswt.py

    This will launch the application on the port 8501 (by default) of our codespace. You can connect to it as usual.

    \ud83e\udd29 Nice, isn't it ?

    Now you can quit the server.

    "},{"location":"1_5_be.html#24-a-more-complex-application","title":"2.4 - A more complex application","text":"

    We will run and package a more complex application, but a lot more useful for your deep learning class

    Clone the following repository git clone https://github.com/fchouteau/isae-demo-streamlit-activation-functions.git

    cd to the directory cd isae-demo-streamlit-activation-functions then as last time, install the dependencies pip install -r requirements.txt then run the application streamlit run app.py

    You can visualize it as last time. This should be quite useful for you given you just left the Deep Learning Class !

    "},{"location":"1_5_be.html#25-transform-application-into-docker-image","title":"2.5 - Transform application into docker image","text":"

    Refer to the previous TP where we built a website to convert what we just did into a docker image.

    In short, create a Dockerfile that inherits from FROM python:3.10, copy all the app files COPY ./ /app/, install the dependencies RUN pip install -r /app/requirements.txt, expose the port EXPOSE 8501 then run as the app as an entrypoint CMD [\"python\", \"-m\", \"streamlit\", \"run\", \"app.py\"].

    You should be able to do it yourself, but if you need help, here's what your Dockerfile looks like :

    Solution
      FROM python:3.10\n\n  COPY ./ /app/\n  RUN pip install -r /app/requirements.txt\n\n  EXPOSE 8501\n\n  WORKDIR /app/\n\n  CMD [\"python\", \"-m\", \"streamlit\", \"run\", \"app.py\"]\n

    Then build your image, and run it locally (using the correct port forwarding which is 8501)

    Solution
      # build\n  docker build -t eu.gcr.io/sdd2324/streamlit-fch:1.0 -f Dockerfile . \n  # run\n  docker run --rm -p 8501:8501 eu.gcr.io/sdd2324/streamlit-fch:1.0 # change this name to yours\n

    Once you know it works locally, tag it and push it to our shared container registry

    Solution
      # push to registry\n  docker push eu.gcr.io/sdd2324/streamlit-fch:1.0 # change this name to yours\n
    "},{"location":"1_5_be.html#26-deployment-in-a-vm","title":"2.6 - Deployment in a VM","text":"

    We will now create yet another VM to deploy our application. This time, we will deploy directly our container in a VM without connecting to ssh to it,

    Don't forget to change the instance name & zone according to what you did previously.

    Take a note to the --container-image and change it to the name of the image you just pushed

    gcloud compute instances create-with-container fch-streamlit-demo \\\n    --project=[your project] \\\n    --zone=europe-west1-b \\\n    --machine-type=n1-standard-1 \\\n    --image=projects/cos-cloud/global/images/cos-stable-109-17800-66-27 \\\n    --boot-disk-size=10GB \\\n    --boot-disk-type=pd-standard \\\n    --container-image=[your image] \\\n    --container-restart-policy=always\n

    Compared to previously, note that we explicitly specify a container to deploy to the VM and we don't use ubuntu but a container optimized OS.

    "},{"location":"1_5_be.html#27-publish-the-results-on-the-web","title":"2.7 - Publish the results on the web","text":"

    First run this command in your codespace. This will expose the port 8501 to the web

    gcloud compute --project=[your project] firewall-rules create open-8501 --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:8501 --source-ranges=0.0.0.0/0\n
    Then, locate the public IP of your VM using the google cloud console.

    Finally, take your phone (it won't work over ISAE wifi, maybe on eduroam) and connect to its port 8501, http://ip-of-the-machine:8501

    \ud83e\uddd0 The app should appear !

    We just deployed a webapp written in python to a public website :)

    "},{"location":"1_5_be.html#28-cleanup","title":"2.8 - Cleanup","text":"

    As usual, cleanup your resources. Delete the GCE VM.

    "},{"location":"1_5_be.html#29-yay","title":"2.9 - Yay !","text":"

    Success

    \ud83c\udf7e you have successfully finished the all parts of the BE. You know how to manipulate the basic notions around cloud computing and docker so that you won't be completely lost when someone will talk about it

    Finish the previous hands-on (cloud & docker) if you have time. In particular, take a look at the docker-compose section.

    "},{"location":"1_5_be.html#3-im-finished-now-im-bored","title":"3 - I'm finished, now I'm bored !","text":"

    I advise you to ensure you've done this part of the previous GCP hands-on

    "},{"location":"1_6_conclusion.html","title":"Recap'","text":"

    Link to slides

    "},{"location":"1_7_readings.html","title":"Readings","text":""},{"location":"1_7_readings.html#about-cloud-computing","title":"About Cloud Computing","text":"
    • Buyya, R., Srirama, S. N., Casale, G., Calheiros, R., Simmhan, Y., Varghese, B., ... & Toosi, A. N. (2018). A manifesto for future generation cloud computing: Research directions for the next decade. ACM computing surveys (CSUR), 51(5), 1-38.

    • On sustainable data centers and energy use (intro)

    • The NIST Definitions of Cloud Computing

    • Open Data: Open Sentinel 2 archive on AWS

    • Environmental Impact of Cloud vs On Premise

    • Environmental Impact of cloud vs on-premise medium blog post

    • Paper from Natural Resources Defense Council on Cloud vs On-Premise

    • Anecdotes about Cloud Computing

    "},{"location":"1_7_readings.html#about-containers","title":"About Containers","text":"
    • Docker whitepaper: Docker and the way of the Devops

    • What exactly is Docker ? Simple explanation from a medium blog post

    "},{"location":"1_7_readings.html#about-orchestration","title":"About Orchestration","text":"
    • Verma, A., Pedrosa, L., Korupolu, M., Oppenheimer, D., Tune, E., & Wilkes, J. (2015, April). Large-scale cluster management at Google with Borg. In Proceedings of the Tenth European Conference on Computer Systems (pp. 1-17).

    • Kubernetes Comic to learn about Kubernetes in a fun way https://cloud.google.com/kubernetes-engine/kubernetes-comic

    "},{"location":"1_8_deployment.html","title":"Intro to Deployment & BE","text":"

    Link to slides

    "},{"location":"1_8_deployment_tp.html","title":"Deploy your ML model into production","text":""},{"location":"1_8_deployment_tp.html#objectifs","title":"Objectifs","text":"

    L'objectif du TP est de convertir ce notebook en deux services containeris\u00e9s :

    • un back-end qui est un serveur qui re\u00e7oit des images et sort des pr\u00e9dictions,
    • un front-end qui vous permet d'envoyer des images au mod\u00e8le et d'afficher les pr\u00e9dictions sur lesdites images,

    Afin de gagner du temps, les dockerfiles ont d\u00e9j\u00e0 \u00e9t\u00e9 construits et sont pr\u00eats \u00e0 \u00eatre test\u00e9s et d\u00e9ploy\u00e9s. Si vous souhaitez rentrer dans les d\u00e9tails et \u00e9crire vous-m\u00eame le code, vous pouvez consulter la version longue de ce TP ci-dessous (qui n'est pas \u00e0 jour).

    Nous allons donc voir :

    • La cr\u00e9ation d'un docker \"backend\" qui contient le mod\u00e8le derri\u00e8re une \"API\"
    • L'interaction avec ce docker
    • La cr\u00e9ation d'un docker \"frontend\" qui contient une IHM permettant d'interagir plus facilement avec le backend
    • docker-compose pour lancer des applications multi-container
    • Le d\u00e9ploiement du backend sur GCP
    • Le test final

    Nous nous pla\u00e7ons dans un contexte \"microservices\" o\u00f9 le front-end et le backend sont 2 containers diff\u00e9rents. Il aurait \u00e9t\u00e9 possible de n'en faire qu'un qui contient les deux (un \"monolithe\"). Une architecture microservices peut avoir certains avantages (modularit\u00e9, maintenance) mais est plus complexe \u00e0 mettre en oeuvre.

    "},{"location":"1_8_deployment_tp.html#1-mise-en-place-du-projet-google-cloud-platform","title":"1 - Mise en place du projet Google Cloud Platform","text":"

    S\u00e9lectionnez votre projet Google Cloud Platform personnel

    "},{"location":"1_8_deployment_tp.html#2-demarrage-du-github-codespace","title":"2 - D\u00e9marrage du GitHub Codespace","text":"

    Si vous avez d\u00e9j\u00e0 d\u00e9marr\u00e9 un GitHub Codespace pr\u00e9c\u00e9demment, vous pouvez le relancer via l'interface habituelle

    Sinon, d\u00e9marrez un github codespace depuis le repository https://github.com/fchouteau/isae-cloud-computing-codespace

    Il est n\u00e9c\u00e9ssaire d'utiliser un codespace \u00e0 partir de ce repository car il contient tout ce dont vous avez besoin pour ce TP.

    Normalement, une fois le codespace lanc\u00e9, vous devriez obtenir une interface vscode avec deux dossiers dont un nomm\u00e9 tp-deployment. Rendez-vous dans ce dossier,

    Il y a plusieurs ressources : le frontend qui contient de quoi construire l'IHM, le backend qui contient de quoi construire le serveur, et des ressources de tests.

    "},{"location":"1_8_deployment_tp.html#3-construction-et-tests-du-backend","title":"3 - Construction et tests du backend","text":"

    Le README.md du dossier backend contient des d\u00e9tails concernant la construction du serveur et de son API (qui \u00e9tait auparavant laiss\u00e9 en exercice). Nous utilisons FastAPI qui un framework de construction d'applications Web.

    Le code principal se trouve dans app.py. On d\u00e9clare des \"routes\" (des m\u00e9thodes d'interactions avec le serveur) puis on leur assigne des fonctions.

    Par exemple, vous pouvez regarder la route /predict qui est associ\u00e9e \u00e0 la fonction du m\u00eame nom.

    @app.post(\n    \"/predict\",\n    description=\"Send a base64 encoded image + the model name, get detections\",\n    response_description=\"Detections + Processing time\",\n    response_model=Result,\n)\n

    Cette fonction effectue l'inf\u00e9rence sur l'image qui est donn\u00e9e via la requ\u00eate REST vers la route /predict.

    Afin de mieux illustrer les possibilit\u00e9s d'int\u00e9raction avec ce serveur, nous allons le lancer localement, en utilisant l'image docker d\u00e9j\u00e0 construite

    Note

    Remarque: vous pouvez reproduire le docker en lan\u00e7ant

    docker build -f Dockerfile -t eu.gcr.io/third-ridge-138414/yolo-v5:1.2

    Lancez la commande suivante docker run --rm -p 8000:8000 eu.gcr.io/third-ridge-138414/yolo-v5:1.2

    Cela lance un container depuis l'image docker du backend en exposant le port 8000.

    Connectez-vous au port 8000 du codespace. Vous devriez avoir une page vierge qui contient \"YOLO-V5 WebApp created with FastAPI\"

    Nous allons maintenant regarder la documentation de l'application. Celle-ci est automatiquement g\u00e9n\u00e9r\u00e9e \u00e0 partir du code de app.py via le framework FastAPI et est disponible sur la route /docs. Pour plus d'informations, voir ici

    Connectez-vous donc \u00e0 la route /docs en rajoutant ce terme \u00e0 l'URL du codespace.

    Cette page web d\u00e9crit les diff\u00e9rentes routes accessibles et leurs m\u00e9thodes d'int\u00e9raction, ainsi que les formats d'entr\u00e9e et de sortie. C'est la documentation de l'API et lorsque vous interagissez avec le serveur, c'est la seule chose dont vous avez besoin. La documentation de l'API est normalis\u00e9e.

    Nous allons maintenant interagir avec ce serveur.

    Dans le dossier backend se trouve un fichier python test_webapp.py. Il va automatiquement envoyer les bonnes requ\u00eates au serveur. Executez-le (python test_webapp.py), vous devriez voir s'afficher des tests correspondants au code, ainsi que les pr\u00e9dictions des chats sur l'image cats.png

    Laissez le terminal avec le container d\u00e9marr\u00e9 pour l'instant,

    "},{"location":"1_8_deployment_tp.html#4-construction-et-tests-du-frontend","title":"4 - Construction et tests du frontend","text":"

    Comme vous aurez pu le constater, ce n'est pas tr\u00e8s intuitif d'interagir avec le backend via des scripts, on aimerait pouvoir visualiser plus facilement les pr\u00e9dictions, faire des seuils sur la confiance des objets, etc...

    Pour cela nous allons cr\u00e9er une application streamlit (remarque: pour une introduction \u00e0 streamlit rendez-vous dans la section 6 du BE)

    Dans votre codespace, d\u00e9marrez un nouveau terminal puis allez dans le dossier frontend. L\u00e0 encore, le fichier app.py contient le code de l'applicaiton streamlit. Celle-ci va r\u00e9cup\u00e9rer une image que vous allez uploader (image de votre choix) puis l'envoyer au serveur dont vous sp\u00e9cifiez l'IP dans la case en haut \u00e0 gauche.

    Nous allons lancer cette application,

    docker run --rm -p 8501:8501 --network=\"host\" eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5

    Rendez-vous sur le port 8501 de votre github codespace,

    La premi\u00e8re \u00e9tape est de renseigner l'adresse (URL) du backend. Pour tester que vous arrivez bien \u00e0 joindre le serveur, cliquez sur le bouton \"IS ALIVE\". Ce bouton (voir code dans app.py) envoie une requ\u00eate \u00e0 la route /health pour v\u00e9rifier que le serveur est vivant.

    Par d\u00e9faut, l'URL du serveur est http://localhost:8000 ce qui semble correct car nous avons ouvert un docker sur le port 8000.

    Vous pouvez maintenant tester le serveur, et s'il marche, uploader une image de votre choix avec le bouton upload puis lancer une pr\u00e9diction. Cela va uploader l'image dans le frontend, puis envoyer une requ\u00eate POST \u00e0 http://url-du-serveur/predict puis r\u00e9cup\u00e9rer les r\u00e9sultats (le json) et l'interpr\u00e9ter correctement.

    Vous noterez que nous avons d\u00e9marr\u00e9 le frontend avec l'argument --network=\"host\". Cela permet au container d'avoir acc\u00e8s au localhost (d'\u00eatre sur le m\u00eame r\u00e9seau que l'h\u00f4te). Sans cet argument, les containers sont sur des r\u00e9seaux s\u00e9par\u00e9s et ne se voient pas.

    Vous pouvez maintenant stopper les deux containers (backend et frontend)

    "},{"location":"1_8_deployment_tp.html#5-docker-compose","title":"5 - docker-compose","text":"

    Pour simplifier cette \u00e9tape de d\u00e9ploiement multi-containers qui peut \u00eatre fastidieuse (imaginez une application \u00e0 4, 5 containers !), une solution nomm\u00e9e docker-compose existe. Voir une introduction \u00e0 docker-compose

    Cette solution permet de lancer une s\u00e9rie de containers en les assignant \u00e0 un m\u00eame r\u00e9seau, de fa\u00e7on d\u00e9clarative, c'est \u00e0 dire que l'on renseigne dans un fichier de configuration la mise en place des containers.

    Notre docker-compose.yml se trouve dans le dossier tp-deployment

    version: '3'\nservices:\n  yolo:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5:1.2\"\n    ports:\n      - \"8000:8000\"\n    hostname: yolo\n  streamlit:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5\"\n    ports:\n      - \"8501:8501\"\n    hostname: streamlit\n

    Ce fichier de configuration indique qu'au lancement le frontend et le backend vont se lancer simultan\u00e9ment, exposer leurs ports respectifs, et pouvoir communiquer entre eux via leurs \"hostnames\".

    Nous allons lancer notre application par ce bias en lan\u00e7ant la commande docker-compose up

    Voir doc docker-compose : https://docs.docker.com/compose/reference/

    Cela va directement d\u00e9marrer nos deux services, que vous pouvez retrouver sur les ports 8000 (backend) et 8501 (frontend)

    Comme pr\u00e9c\u00e9demment, vous pouvez vous connecter au frontend sur le port 8501 du codespace pour interagir directement avec le backend. La petite nuance est que ce backend est disponible sur http://yolo:8000 plut\u00f4t que http://localhost:8000 car le docker-compose a nomm\u00e9 les containers avec un hostname correspondant \u00e0 celui sp\u00e9cifi\u00e9 (et les a mis en r\u00e9seau)

    Une fois que vous avez interagi avec votre d\u00e9ploiement, nous allons maintenant d\u00e9ployer le backend sur un serveur sur google cloud.

    "},{"location":"1_8_deployment_tp.html#6-deploiement-du-backend-sur-une-vm-google-compute-engine","title":"6 - Deploiement du backend sur une VM Google Compute Engine","text":"

    Nous allons maintenant d\u00e9marrer une instance de VM Google Compute Engine et directement y d\u00e9ployer un container. Vous avez d\u00e9j\u00e0 vu cette m\u00e9thode dans la section streamlit du BE

    N'oubliez pas de connecter votre github codespace \u00e0 votre projet gcp en utilisant gcloud init

    R\u00e9cup\u00e9rez votre project_id gcp via l'interface ou via la variable suivante : PROJECT_ID=$(gcloud config get-value project 2> /dev/null)

    Puis nous allons cr\u00e9er directement une VM en y d\u00e9ployant un container. Notez que l'on utilise cette fois un OS d\u00e9di\u00e9 \u00e0 l'h\u00e9bergement de containers (pas pr\u00e9vu pour s'y connecter en ssh) plut\u00f4t qu'ubuntu comme pr\u00e9c\u00e9demment.

    gcloud compute instances create-with-container fch-yolo-backend \\\n    --project=${PROJECT_ID} \\\n    --zone=europe-west1-b \\\n    --machine-type=n1-standard-2 \\\n    --image=projects/cos-cloud/global/images/cos-stable-109-17800-66-27 \\\n    --boot-disk-size=20GB \\\n    --boot-disk-type=pd-standard \\\n    --container-image=eu.gcr.io/third-ridge-138414/yolo-v5:1.2 \\\n    --container-restart-policy=always\n

    Note : Si vous utilisez votre propre projet GCP, vous devez ouvrir le port 8000 \u00e0 internet public pour pouvoir y acc\u00e9der. Utilisez cette commande :

    gcloud compute --project=${PROJECT_ID} firewall-rules create open-8000 --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:8000 --source-ranges=0.0.0.0/0 \n
    "},{"location":"1_8_deployment_tp.html#7-tests","title":"7 - Tests","text":"

    Nous allons maintenant tester que notre backend est bien d\u00e9ploy\u00e9. Il faut pour cela relancer le front-end et changer l'IP pour l'IP de la machine virtuelle pr\u00e9c\u00e9demment lanc\u00e9e

    • relancez le docker du frontend docker run --rm -p 8501:8501 eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.5
    • connectez vous au port 8501 du github codespace, comme pr\u00e9c\u00e9demment, et modifiez l'IP du backend pour qu'il corresponde \u00e0 celle du serveur distant, c'est \u00e0 dire l'IP publique de votre VM GCP (toujours sur le port 8000)
    • si vous envoyez une requ\u00eate, elle est maintenant transmise au backend h\u00e9berg\u00e9e sur GCP !
    "},{"location":"1_8_deployment_tp.html#8-yay","title":"8. Yay !","text":"

    Success

    \ud83c\udf7e Et voil\u00e0, vous avez d\u00e9ploy\u00e9 votre premier mod\u00e8le sur le cloud

    Warning

    N'oubliez pas de supprimer votre VM GCE une fois le travail termin\u00e9

    "},{"location":"1_8_deployment_tp.html#9-bonus-passer-a-lechelle","title":"9. BONUS - Passer \u00e0 l'\u00e9chelle","text":"

    Nous venons de d\u00e9ployer un mod\u00e8le sur une unique machine.

    Il manque certains \u00e9l\u00e9ments \u00e0 notre d\u00e9ploiement :

    • Un nom de domaine
    • Une capacit\u00e9 \u00e0 passer \u00e0 l'\u00e9chelle sur plusieurs machines, ou d'\u00eatre \u00e0 z\u00e9ro machines s'il n'y a pas de demandes
    • Une gestion des mises \u00e0 jour : Comment d\u00e9ployer une nouvelle version de l'application ?
    • Un routage du trafic sur la bonne instance

    Nous allons donc voir une solution de d\u00e9ploiement de container \"manag\u00e9e\" (aussi dite serverless / \"Container as a Service\") : Google Cloud Run. Pour en savoir plus, lisez l'introduction au service.

    L'objectif est de d\u00e9ployer notre container qui est un service, sans g\u00e9rer l'infrastructure, ni le routage.

    Nous allons suivre \u00e0 peu pr\u00e8s les \u00e9tapes du tutorial

    Hint

    Afin de tester le passage \u00e0 l'\u00e9chelle, il est recommand\u00e9 de se mettre en groupe et de ne faire qu'un seul d\u00e9ploiement et ensuite de tous essayer d'utiliser le m\u00eame service (la m\u00eame URL) une fois celui-ci d\u00e9ploy\u00e9.

    • Rendez-vous sur la page de GCR
    • S\u00e9lectionnez \"d\u00e9ployer un container\"
    • Entrez l'URL du container \u00e0 d\u00e9ployer `eu.gcr.io/third-ridge-138414/yolo-v5:1.2``
    • Entrez un nom de service
    • S\u00e9lectionnez la zone europe (west1, west4, west9)
    • Autorisez les requ\u00eates non authentifi\u00e9es
    • Ingress control : all
    • Dans les param\u00e8tres du container, s\u00e9lectionnez le port 8000 et allouez lui 2 Go de RAM
    • R\u00e9glez 10s de timeout et 5 requ\u00eates maximum par instance
    • Mettez 5 instances maximum
    • Et voil\u00e0 !

    Normalement, votre service se cr\u00e9e. Une fois celui-ci d\u00e9marr\u00e9, une instance est d\u00e9marr\u00e9e (vous n'avez pas la main sur l'infrastructure) et la pr\u00e9diction est accessible \u00e0 l'URL du service.

    Relancez le front end depuis votre codespace puis entrez l'URL du service. Lancez une pr\u00e9diction.

    Success

    \ud83c\udf7e Et voil\u00e0, vous avez d\u00e9ploy\u00e9 votre premier mod\u00e8le sur le cloud

    Si vous essayez de faire plusieurs requ\u00eates simultan\u00e9es au service avec des images diff\u00e9rentes depuis plusieurs personnes, il est possible que le service \"passe \u00e0 l'\u00e9chelle\" automatiquement

    Pour surveiller le trafic de votre service vous pouvez utilisez : - Soit la page web du service google cloud run - Soit le Metrics Explorer en s\u00e9lectionnant la m\u00e9trique Cloud Run R\u00e9vision - Container - Instance Count. Vous pouvez aussi ajouter cette m\u00e9trique en widget du dashboard gcr...

    Hint

    Normalement une URL d'un service a \u00e9t\u00e9 post\u00e9e sur slack, vous pouvez l'essayer...

    Warning

    N'oubliez pas de supprimer votre service google cloud run une fois le travail termin\u00e9

    "},{"location":"1_8_deployment_tp_long.html","title":"Deployment : Deploy your ML model in production (Version Longue de janvier 2023)","text":""},{"location":"1_8_deployment_tp_long.html#objectives","title":"Objectives","text":"

    Your first ML model in production !

    • A model behind a Restful API, packaged in a docker
    • A frontend using streamlit, packaged in a docker
    • Deploy a multi-container application using docker compose
    • Deploy the model in the docker image
    • Send it to your friends !

    Regardons ce notebook

    Il effectue les op\u00e9rations suivantes:

    • Chargement d'un mod\u00e8le
    • Chargement d'une image
    • D\u00e9tection des \"objets\" sur l'image
    • Dessin des d\u00e9tections sur l'image
    • Affichage

    L'objectif est de convertir ce notebook en deux applications :

    • L'une qui \"sert\" les pr\u00e9dictions d'un mod\u00e8le (le serveur)
    • L'une qui permet \u00e0 un utilisateur d'interagir facilement avec le mod\u00e8le en mettant en ligne sa propre image (le \"client\")

    Nous allons d\u00e9velopper tout cela dans l'environnement de d\u00e9veloppement (codespaces)

    Puis d\u00e9ployer le mod\u00e8le dans l'environnement GCP

    "},{"location":"1_8_deployment_tp_long.html#team-composition","title":"Team Composition","text":"

    C'est mieux d'\u00eatre en bin\u00f4me pour s'entraider :)

    "},{"location":"1_8_deployment_tp_long.html#configuration-du-codespace","title":"Configuration du codespace","text":"

    Nous allons utiliser github codespaces comme environnement de d\u00e9veloppement,

    Repartir de https://github.com/github/codespaces-blank

    Puis configurer ce codespace avec le google cloud sdk et configurer le projet isae-sdd

    Hint

    # Rappels : Installation du google cloud sdk\n# https://cloud.google.com/sdk/docs/install#linux\ncurl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-416.0.0-linux-x86_64.tar.gz\ntar -xf google-cloud-cli-416.0.0-linux-x86.tar.gz\n./google-cloud-sdk/install.sh\n# Type yes to add to path !\nexport PATH=./google-cloud-sdk/bin:$PATH\ngcloud init\n# login and copy the token\n# configure isae-sdd then compute zone 17\ngcloud auth configure-docker\n

    Voir les tps pr\u00e9c\u00e9dents

    Maintenant, depuis ce codespace, ouvrez un terminal et r\u00e9cup\u00e9rez les fichiers suivants :

    gsutil cp -r gs://fchouteau-isae-cloud/deployment/* .\n

    Hint

    Si vous tombez \u00e0 court de stockage dans le TP, lancez docker system prune pour nettoyer le cache docker

    "},{"location":"1_8_deployment_tp_long.html#1-converting-a-prediction-notebook-into-a-webapplication","title":"1 - Converting a prediction notebook into a webapplication","text":"

    Placez vous dans le dossier model nouvellement cr\u00e9\u00e9

    "},{"location":"1_8_deployment_tp_long.html#objectif","title":"Objectif","text":"

    Packager un mod\u00e8le de machine learning derri\u00e8re une webapplication pour pouvoir la d\u00e9ployer sur le web et servir des pr\u00e9dictions \u00e0 des utilisateurs

    Le mod\u00e8le: Un d\u00e9tecteur d'objets sur des photographies \"standard\" suppos\u00e9 marcher en temps r\u00e9el, qui sort des \"bounding boxes\" autour des objets d\u00e9tect\u00e9 dans des images

    Remarque : Le papier vaut la lecture https://pjreddie.com/media/files/papers/YOLOv3.pdf

    On r\u00e9cup\u00e8re la version disponible sur torchhub https://pytorch.org/hub/ultralytics_yolov5/ qui correspond au repository suivant https://github.com/ultralytics/yolov5

    Voici une petite explication de l'historique de YOLO https://medium.com/towards-artificial-intelligence/yolo-v5-is-here-custom-object-detection-tutorial-with-yolo-v5-12666ee1774e

    On se propose ici d'encapsuler 3 versions du mod\u00e8le (S,M,L) qui sont 3 versions +/- complexes du mod\u00e8le YOLO-V5, afin de pouvoir comparer les performances et les r\u00e9sultats

    "},{"location":"1_8_deployment_tp_long.html#deroulement","title":"D\u00e9roulement","text":"
    • Transformer un notebook de pr\u00e9diction en \u201cWebApp\u201d en remplissant app.stub.py et en le renommant en app.py
    • Packager l'application sous forme d'une image docker
    • Tester son image docker localement
    • Uploader le docker sur Google Container Registry
    "},{"location":"1_8_deployment_tp_long.html#developpement-de-apppy","title":"D\u00e9veloppement de app.py","text":"

    Regardons le app.stub.py (que l'on renommera en app.py)

    import base64\nimport io\nimport time\nfrom typing import List, Dict\n\nimport numpy as np\nimport torch\nfrom PIL import Image\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\n\nclass Input(BaseModel):\n    model: str\n    image: str\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\n# !!!! FILL ME\ndef parse_predictions(prediction: np.ndarray, classes: [str]) -> List[Detection]:\n    raise NotImplementedError\n\n\n# !!!! FILL ME\ndef load_model(model_name: str):\n    \"\"\"\"\"\"\n    raise NotImplementedError\n\n\nMODEL_NAMES = [\"yolov5s\", \"yolov5m\", \"yolov5l\"]\n\napp = FastAPI(\n    title=\"NAME ME\",\n    description=\"\"\"\n                DESCRIBE ME\n                \"\"\",\n    version=\"1.0\",\n)\n\n# !!!! FILL ME\n# This is a dictionnary that must contains a model for each key (model names), fill load model\n# example: for model_name in MODEL_NAMES: MODELS[model_name] = load_model(model_name)\n# You can also lazily load models only when they are called to avoid holding 3 models in memory\nMODELS = ...\n\n\n@app.get(\"/\", description=\"return the title\", response_description=\"FILL ME\", response_model=str)\ndef root() -> str:\n    return app.title\n\n\n@app.get(\"/describe\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef describe() -> str:\n    return app.description\n\n\n@app.get(\"/health\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef health() -> str:\n    return \"HEALTH OK\"\n\n\n@app.get(\"/models\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=List[str])\ndef models() -> [str]:\n    return MODEL_NAMES\n\n\n@app.post(\"/predict\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=Result)\ndef predict(inputs: Input) -> Result:\n\n    # get correct model\n    model_name = inputs.model\n\n    if model_name not in MODEL_NAMES:\n        raise HTTPException(status_code=400, detail=\"wrong model name, choose between {}\".format(MODEL_NAMES))\n\n    # Get the model from the list of available models\n    model = MODELS.get(model_name)\n\n    # Get & Decode image\n    try:\n        image = inputs.image.encode(\"utf-8\")\n        image = base64.b64decode(image)\n        image = Image.open(io.BytesIO(image))\n    except:\n        raise HTTPException(status_code=400, detail=\"File is not an image\")\n    # Convert from RGBA to RGB *to avoid alpha channels*\n    if image.mode == \"RGBA\":\n        image = image.convert(\"RGB\")\n\n    # Inference\n\n    # RUN THE PREDICTION, TIME IT\n    predictions = ...\n\n    # Post processing\n    classes = predictions.names\n    predictions = predictions.xyxy[0].numpy()\n\n    # Create a list of [DETECTIONS] objects that match the detection class above, using the parse_predictions method\n    detections = ...\n\n    result = Result(detections=..., time=..., model=...)\n\n    return result\n

    Dans un premier temps, vous pouvez remplir la description des \"routes\" (i.e. des fonctions de l'application):

    @app.get(\"/\", description=\"return the title\", response_description=\"FILL ME\", response_model=str)\ndef root() -> str:\n    return app.title\n\n\n@app.get(\"/describe\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef describe() -> str:\n    return app.description\n\n\n@app.get(\"/health\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=str)\ndef health() -> str:\n    return \"HEALTH OK\"\n\n\n@app.get(\"/models\", description=\"FILL ME\", response_description=\"FILL ME\", response_model=List[str])\ndef models() -> [str]:\n    return MODEL_NAMES\n

    Il y a deux fonctions \u00e0 compl\u00e9ter en s'inspirant du notebook inference.ipynb. Grace au typage de python, vous avez les types d'entr\u00e9e et de sortie des deux fonctions

    La premi\u00e8re prend un tableau de type (left, top, right, bottom, confidence, class_index) et une liste de noms de classes et cr\u00e9\u00e9e une liste d'objets Detection (voir le code pour la cr\u00e9ation des objets d\u00e9tection)

    # !!!! FILL ME\ndef parse_predictions(predictions: np.ndarray, classes: [str]) -> List[Detection]:\n    raise NotImplementedError\n
    Hint
    def parse_prediction(prediction: np.ndarray, classes: [str]) -> Detection:\nx0, y0, x1, y1, cnf, cls = prediction\ndetection = Detection(\n    x_min=int(x0),\n    y_min=int(y0),\n    x_max=int(x1),\n    y_max=int(y1),\n    confidence=round(float(cnf), 3),\n    class_name=classes[int(cls)],\n)\nreturn detection\n

    La seconde fonction doit charger un mod\u00e8le via torchhub en fonction de son nom (voir le docker)

    # !!!! FILL ME\ndef load_model(model_name: str):\n    \"\"\"\"\"\"\n    raise NotImplementedError\n
    Hint
    def load_model(model_name: str) -> Dict:\n    # Load model from torch\n    model = torch.hub.load(\"ultralytics/yolov5\", model_name, pretrained=True)\n    # Evaluation mode + Non maximum threshold\n    model = model.eval()\n\nreturn model\n

    Ensuite, vous pouvez executer les fonctions de chargement de mod\u00e8le, par exemple

    # !!!! FILL ME\n# This is a dictionnary that must contains a model for each key (model names), fill load model\n# example: for model_name in MODEL_NAMES: MODELS[model_name] = load_model(model_name)\n# You can also lazily load models only when they are called to avoid holding 3 models in memory\nMODELS = {}\nfor model_name in MODEL_NAMES:\n    MODELS[model_name] = load_model(model_name)\n

    Enfin, il s'agit d'\u00e9crire un code qui effectue une pr\u00e9diction \u00e0 partir d'une image PIL et de mesurer le temps (indice: import time et t0 = time.time() ...) de pr\u00e9diction

    # RUN THE PREDICTION, TIME IT\npredictions = ...\n# Post processing\nclasses = predictions.names\npredictions = predictions.xyxy[0].numpy()\n

    Le r\u00e9sultat de predictions est un tableau numpy compos\u00e9 des colonnes left, top, right, bottom, confidence, class_index

    Il s'agit ensuite de transformer ces predictions en [Detection]

    class Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n
    # Create a list of [DETECTIONS] objects that match the detection class above, using the parse_predictions method\ndetections = parse_predictions(predictions, classes)\n
    Hint
    # Inference\nt0 = time.time()\npredictions = model(image, size=640)  # includes NMS\nt1 = time.time()\nclasses = predictions.names\n\n# Post processing\npredictions = predictions.xyxy[0].numpy()\ndetections = [parse_prediction(prediction=pred, classes=classes) for pred in predictions]\n\nresult = Result(detections=detections, time=round(t1 - t0, 3), model=model_name)\n
    "},{"location":"1_8_deployment_tp_long.html#correction","title":"Correction","text":"

    app.py

    Hint
    import base64\nimport io\nimport time\nfrom typing import List, Dict\n\nimport numpy as np\nimport torch\nfrom PIL import Image\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\n\n\nclass Input(BaseModel):\n    model: str\n    image: str\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\ndef parse_prediction(prediction: np.ndarray, classes: [str]) -> Detection:\n    x0, y0, x1, y1, cnf, cls = prediction\n    detection = Detection(\n        x_min=int(x0),\n        y_min=int(y0),\n        x_max=int(x1),\n        y_max=int(y1),\n        confidence=round(float(cnf), 3),\n        class_name=classes[int(cls)],\n    )\n    return detection\n\n\ndef load_model(model_name: str) -> Dict:\n    # Load model from torch\n    model = torch.hub.load(\"ultralytics/yolov5\", model_name, pretrained=True)\n    # Evaluation mode + Non maximum threshold\n    model = model.eval()\n\n    return model\n\n\n# %%\napp = FastAPI(\n    title=\"YOLO-V5 WebApp created with FastAPI\",\n    description=\"\"\"\n                Wraps 3 different yolo-v5 models under the same RESTful API\n                \"\"\",\n    version=\"1.1\",\n)\n\n# %%\nMODEL_NAMES = [\"yolov5s\", \"yolov5m\", \"yolov5l\"]\nMODELS = {}\n\n\n@app.get(\"/\", description=\"return the title\", response_description=\"title\", response_model=str)\ndef root() -> str:\n    return app.title\n\n\n@app.get(\"/describe\", description=\"return the description\", response_description=\"description\", response_model=str)\ndef describe() -> str:\n    return app.description\n\n\n@app.get(\"/version\", description=\"return the version\", response_description=\"version\", response_model=str)\ndef describe() -> str:\n    return app.version\n\n\n@app.get(\"/health\", description=\"return whether it's alive\", response_description=\"alive\", response_model=str)\ndef health() -> str:\n    return \"HEALTH OK\"\n\n\n@app.get(\n    \"/models\",\n    description=\"Query the list of models\",\n    response_description=\"A list of available models\",\n    response_model=List[str],\n)\ndef models() -> [str]:\n    return MODEL_NAMES\n\n\n@app.post(\n    \"/predict\",\n    description=\"Send a base64 encoded image + the model name, get detections\",\n    response_description=\"Detections + Processing time\",\n    response_model=Result,\n)\ndef predict(inputs: Input) -> Result:\n    global MODELS\n\n    # get correct model\n    model_name = inputs.model\n\n    if model_name not in MODEL_NAMES:\n        raise HTTPException(status_code=400, detail=\"wrong model name, choose between {}\".format(MODEL_NAMES))\n\n    # check load\n    if MODELS.get(model_name) is None:\n        MODELS[model_name] = load_model(model_name)\n\n    model = MODELS.get(model_name)\n\n    # Get Image\n    # Decode image\n    try:\n        image = inputs.image.encode(\"utf-8\")\n        image = base64.b64decode(image)\n        image = Image.open(io.BytesIO(image))\n    except:\n        raise HTTPException(status_code=400, detail=\"File is not an image\")\n    # Convert from RGBA to RGB *to avoid alpha channels*\n    if image.mode == \"RGBA\":\n        image = image.convert(\"RGB\")\n\n    # Inference\n    t0 = time.time()\n    predictions = model(image, size=640)  # includes NMS\n    t1 = time.time()\n    classes = predictions.names\n\n    # Post processing\n    predictions = predictions.xyxy[0].numpy()\n    detections = [parse_prediction(prediction=pred, classes=classes) for pred in predictions]\n\n    result = Result(detections=detections, time=round(t1 - t0, 3), model=model_name)\n\n    return result\n
    "},{"location":"1_8_deployment_tp_long.html#construire-le-docker","title":"Construire le docker","text":"
    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker build -t eu.gcr.io/${PROJECT_ID}/{you rname}{your app name}:{your version} -f Dockerfile . \n
    "},{"location":"1_8_deployment_tp_long.html#tester-le-docker","title":"Tester le docker","text":"

    Vous pouvez lancer le docker localement et le tester avec le notebook

    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker run --rm -p 8000:8000 eu.gcr.io/${PROJECT_ID}/{your-name}-{your app name}:{your version}\n

    Vous pouvez vous connecter \u00e0 votre appli via son ip publique sur le port 8000 depuis votre navigateur local

    http://{ip}:8000

    Essayez quelques routes :

    /models /docs

    "},{"location":"1_8_deployment_tp_long.html#pusher-le-docker-sur-google-container-registry","title":"Pusher le docker sur google container registry","text":"
    gcloud auth configure-docker\ndocker push eu.gcr.io/${PROJECT_ID}/{your-name}-model:{your version}\n

    Si vous devez mettre \u00e0 jour le docker, il faut incr\u00e9menter la version pour le d\u00e9ploiement

    "},{"location":"1_8_deployment_tp_long.html#liens-utiles","title":"Liens Utiles","text":"
    • https://fastapi.tiangolo.com/
    • https://requests.readthedocs.io/en/master/
    • https://testdriven.io/blog/fastapi-streamlit/
    "},{"location":"1_8_deployment_tp_long.html#2-making-a-companion-application","title":"2 - Making a companion application","text":"

    Allez dans le dossier streamlit

    "},{"location":"1_8_deployment_tp_long.html#objectif_1","title":"Objectif","text":"

    Cr\u00e9er une application \"compagnon\" qui permet de faire des requ\u00eates \u00e0 un mod\u00e8le de fa\u00e7on ergonomique et de visualiser les r\u00e9sultats

    "},{"location":"1_8_deployment_tp_long.html#deroulement_1","title":"D\u00e9roulement","text":"
    • Remplir app.stub.py, le renommer en app.py en remplissant les bons champs (s'aider des notebooks dans app/) et en cr\u00e9ant des jolies visualisations
    • Packager l'application sous forme d'une image docker
    • Tester son image docker localement
    • Uploader le docker sur Google Container Registry
    "},{"location":"1_8_deployment_tp_long.html#guide-de-developpement","title":"Guide de d\u00e9veloppement","text":"

    Regardons le APP.md

    • Remplissez le fichier avec la description de votre application

    Regardons le app.stub.py

    import requests\nimport streamlit as st\nfrom PIL import Image\nimport io\nimport base64\nfrom pydantic import BaseModel\nfrom typing import List\nimport random\n\n# ---- Functions ---\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\n@st.cache(show_spinner=True)\ndef make_dummy_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    This simulates a fake answer for you to test your application without having access to any other input from other teams\n    \"\"\"\n    # We do a dummy encode and decode pass to check that the file is correct\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n    # We do a dummy decode\n    _image = data.get(\"image\")\n    _image = _image.encode(\"utf-8\")\n    _image = base64.b64decode(_image)\n    _image = Image.open(io.BytesIO(_image))  # type: Image\n    if _image.mode == \"RGBA\":\n        _image = _image.convert(\"RGB\")\n\n    _model = data.get(\"model\")\n\n    # We generate a random prediction\n    w, h = _image.size\n\n    detections = [\n        Detection(\n            x_min=random.randint(0, w // 2 - 1),\n            y_min=random.randint(0, h // 2 - 1),\n            x_max=random.randint(w // w, w - 1),\n            y_max=random.randint(h // 2, h - 1),\n            class_name=\"dummy\",\n            confidence=round(random.random(), 3),\n        )\n        for _ in range(random.randint(1, 10))\n    ]\n\n    # We return the result\n    result = Result(time=0.1, model=_model, detections=detections)\n\n    return result\n\n\n@st.cache(show_spinner=True)\ndef make_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    Process our data and send a proper request\n    \"\"\"\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n        response = requests.post(\"{}/predict\".format(model_url), json=data)\n\n    if not response.status_code == 200:\n        raise ValueError(\"Error in processing payload, {}\".format(response.text))\n\n    response = response.json()\n\n    return Result.parse_obj(response)\n\n\n# ---- Streamlit App ---\n\nst.title(\"NAME ME BECAUSE I AM AWESOME\")\n\nwith open(\"APP.md\") as f:\n    st.markdown(f.read())\n\n# --- Sidebar ---\n# defines an h1 header\n\nmodel_url = st.sidebar.text_input(label=\"Cluster URL\", value=\"http://localhost:8000\")\n\n_model_url = model_url.strip(\"/\")\n\nif st.sidebar.button(\"Send 'is alive' to IP\"):\n    try:\n        response = requests.get(\"{}/health\".format(_model_url))\n        if response.status_code == 200:\n            st.sidebar.success(\"Webapp responding at {}\".format(_model_url))\n        else:\n            st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n    except ConnectionError:\n        st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n\ntest_mode_on = st.sidebar.checkbox(label=\"Test Mode - Generate dummy answer\", value=False)\n\n# --- Main window\n\nst.markdown(\"## Inputs\")\nst.markdown(\"Describe something... You can also add things like confidence slider etc...\")\n\n# Here we should be able to choose between [\"yolov5s\", \"yolov5m\", \"yolov5l\"], perhaps a radio button with the three choices ?\nmodel_name = ...\n\n# Here we should be able to upload a file (our image)\nimage_file = ...\n\n# Converting image, this is done for you :)\nif image_file is not None:\n    image_file.seek(0)\n    image = image_file.read()\n    image = Image.open(io.BytesIO(image))\n\nif st.button(label=\"SEND PAYLOAD\"):\n\n    if test_mode_on:\n        st.warning(\"Simulating a dummy request to {}\".format(model_url))\n        result = ...  # call the proper function\n    else:\n        result = ...  # call the proper function\n\n    st.balloons()\n\n    st.markdown(\"## Display\")\n\n    st.markdown(\"Make something pretty, draw polygons and confidence..., here's an ugly output\")\n\n    st.image(image, width=512, caption=\"Uploaded Image\")\n\n    st.text(\"Model : {}\".format(result.model))\n    st.text(\"Processing time : {}s\".format(result.time))\n\n    for detection in result.detections:\n        st.json(detection.json())\n

    La majorit\u00e9 des fonctions de requ\u00eate sont d\u00e9j\u00e0 impl\u00e9ment\u00e9es, il reste \u00e0 faire les fonctions d'entr\u00e9es utilisateurs et la visualisation

    • Entr\u00e9e: Utilisation de st.radio et st.file_uploader:

    https://docs.streamlit.io/en/stable/getting_started.html

    https://docs.streamlit.io/en/stable/api.html#streamlit.radio

    https://docs.streamlit.io/en/stable/api.html#streamlit.file_uploader

    st.markdown(\"## Inputs\")\nst.markdown(\"Select your model (Small, Medium or Large)\")\n\nmodel_name = st.radio(label=\"Model Name\", options=[\"yolov5s\", \"yolov5m\", \"yolov5l\"])\n\nst.markdown(\"Upload an image\")\n\nimage_file = st.file_uploader(label=\"Image File\", type=[\"png\", \"jpg\", \"tif\"])\n
    • Visualisations

    Exemple de code qui imite le notebook de pr\u00e9diction pour dessiner sur une image PIL

    def draw_preds(image: Image, detections: [Detection]):\n\n    class_names = list(set([detection.class_name for detection in detections]))\n\n    image_with_preds = image.copy()\n\n    # Define colors\n    colors = plt.cm.get_cmap(\"viridis\", len(class_names)).colors\n    colors = (colors[:, :3] * 255.0).astype(np.uint8)\n\n    # Define font\n    font = list(Path(\"/usr/share/fonts\").glob(\"**/*.ttf\"))[0].name\n    font = ImageFont.truetype(font=font, size=np.floor(3e-2 * image_with_preds.size[1] + 0.5).astype(\"int32\"))\n    thickness = (image_with_preds.size[0] + image_with_preds.size[1]) // 300\n\n    # Draw detections\n    for detection in detections:\n        left, top, right, bottom = detection.x_min, detection.y_min, detection.x_max, detection.y_max\n        score = float(detection.confidence)\n        predicted_class = detection.class_name\n        class_idx = class_names.index(predicted_class)\n\n        label = \"{} {:.2f}\".format(predicted_class, score)\n\n        draw = ImageDraw.Draw(image_with_preds)\n        label_size = draw.textsize(label, font)\n\n        top = max(0, np.floor(top + 0.5).astype(\"int32\"))\n        left = max(0, np.floor(left + 0.5).astype(\"int32\"))\n        bottom = min(image_with_preds.size[1], np.floor(bottom + 0.5).astype(\"int32\"))\n        right = min(image_with_preds.size[0], np.floor(right + 0.5).astype(\"int32\"))\n\n        if top - label_size[1] >= 0:\n            text_origin = np.array([left, top - label_size[1]])\n        else:\n            text_origin = np.array([left, top + 1])\n\n        # My kingdom for a good redistributable image drawing library.\n        for r in range(thickness):\n            draw.rectangle([left + r, top + r, right - r, bottom - r], outline=tuple(colors[class_idx]))\n        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=tuple(colors[class_idx]))\n\n        if any(colors[class_idx] > 128):\n            fill = (0, 0, 0)\n        else:\n            fill = (255, 255, 255)\n\n        draw.text(text_origin, label, fill=fill, font=font)\n\n        del draw\n\n    return image_with_preds\n

    Utilisation (exemple)

        if test_mode_on:\n        st.warning(\"Simulating a dummy request to {}\".format(model_url))\n        result = ...  # call the proper function\n    else:\n        result = ...  # call the proper function\n\n    st.balloons()\n\n    st.markdown(\"## Display\")\n\n    st.text(\"Model : {}\".format(result.model))\n    st.text(\"Processing time : {}s\".format(result.time))\n\n    image_with_preds = draw_preds(image, result.detections)\n    st.image(image_with_preds, width=1024, caption=\"Image with detections\")\n\n    st.markdown(\"### Detection dump\")\n    for detection in result.detections:\n        st.json(detection.json())\n
    "},{"location":"1_8_deployment_tp_long.html#corection-apppy","title":"Corection app.py","text":"Hint
    import base64\nimport io\nimport random\nfrom pathlib import Path\nfrom typing import List\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport requests\nimport streamlit as st\nfrom PIL import Image\nfrom PIL import ImageDraw, ImageFont\nfrom pydantic import BaseModel\n\n# ---- Functions ---\n\n\nclass Detection(BaseModel):\n    x_min: int\n    y_min: int\n    x_max: int\n    y_max: int\n    class_name: str\n    confidence: float\n\n\nclass Result(BaseModel):\n    detections: List[Detection] = []\n    time: float = 0.0\n    model: str\n\n\n@st.cache(show_spinner=True)\ndef make_dummy_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    This simulates a fake answer for you to test your application without having access to any other input from other teams\n    \"\"\"\n    # We do a dummy encode and decode pass to check that the file is correct\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n    # We do a dummy decode\n    _image = data.get(\"image\")\n    _image = _image.encode(\"utf-8\")\n    _image = base64.b64decode(_image)\n    _image = Image.open(io.BytesIO(_image))  # type: Image\n    if _image.mode == \"RGBA\":\n        _image = _image.convert(\"RGB\")\n\n    _model = data.get(\"model\")\n\n    # We generate a random prediction\n    w, h = _image.size\n\n    detections = [\n        Detection(\n            x_min=random.randint(0, w // 2 - 1),\n            y_min=random.randint(0, h // 2 - 1),\n            x_max=random.randint(w // w, w - 1),\n            y_max=random.randint(h // 2, h - 1),\n            class_name=\"dummy\",\n            confidence=round(random.random(), 3),\n        )\n        for _ in range(random.randint(1, 10))\n    ]\n\n    # We return the result\n    result = Result(time=0.1, model=_model, detections=detections)\n\n    return result\n\n\n@st.cache(show_spinner=True)\ndef make_request(model_url: str, model: str, image: Image) -> Result:\n    \"\"\"\n    Process our data and send a proper request\n    \"\"\"\n    with io.BytesIO() as buffer:\n        image.save(buffer, format=\"PNG\")\n        buffer: str = base64.b64encode(buffer.getvalue()).decode(\"utf-8\")\n        data = {\"model\": model, \"image\": buffer}\n\n        response = requests.post(\"{}/predict\".format(model_url), json=data)\n\n    if not response.status_code == 200:\n        raise ValueError(\"Error in processing payload, {}\".format(response.text))\n\n    response = response.json()\n\n    return Result.parse_obj(response)\n\n\ndef draw_preds(image: Image, detections: [Detection]):\n\n    class_names = list(set([detection.class_name for detection in detections]))\n\n    image_with_preds = image.copy()\n\n    # Define colors\n    colors = plt.cm.get_cmap(\"viridis\", len(class_names)).colors\n    colors = (colors[:, :3] * 255.0).astype(np.uint8)\n\n    # Define font\n    font = list(Path(\"/usr/share/fonts\").glob(\"**/*.ttf\"))[0].name\n    font = ImageFont.truetype(font=font, size=np.floor(3e-2 * image_with_preds.size[1] + 0.5).astype(\"int32\"))\n    thickness = (image_with_preds.size[0] + image_with_preds.size[1]) // 300\n\n    # Draw detections\n    for detection in detections:\n        left, top, right, bottom = detection.x_min, detection.y_min, detection.x_max, detection.y_max\n        score = float(detection.confidence)\n        predicted_class = detection.class_name\n        class_idx = class_names.index(predicted_class)\n\n        label = \"{} {:.2f}\".format(predicted_class, score)\n\n        draw = ImageDraw.Draw(image_with_preds)\n        label_size = draw.textsize(label, font)\n\n        top = max(0, np.floor(top + 0.5).astype(\"int32\"))\n        left = max(0, np.floor(left + 0.5).astype(\"int32\"))\n        bottom = min(image_with_preds.size[1], np.floor(bottom + 0.5).astype(\"int32\"))\n        right = min(image_with_preds.size[0], np.floor(right + 0.5).astype(\"int32\"))\n\n        if top - label_size[1] >= 0:\n            text_origin = np.array([left, top - label_size[1]])\n        else:\n            text_origin = np.array([left, top + 1])\n\n        # My kingdom for a good redistributable image drawing library.\n        for r in range(thickness):\n            draw.rectangle([left + r, top + r, right - r, bottom - r], outline=tuple(colors[class_idx]))\n        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=tuple(colors[class_idx]))\n\n        if any(colors[class_idx] > 128):\n            fill = (0, 0, 0)\n        else:\n            fill = (255, 255, 255)\n\n        draw.text(text_origin, label, fill=fill, font=font)\n\n        del draw\n\n    return image_with_preds\n\n\n# ---- Streamlit App ---\n\nst.title(\"Yolo v5 Companion App\")\n\nst.markdown(\n    \"A super nice companion application to send requests and parse results\\n\"\n    \"We wrap https://pytorch.org/hub/ultralytics_yolov5/\"\n)\n\n# ---- Sidebar ----\n\ntest_mode_on = st.sidebar.checkbox(label=\"Test Mode - Generate dummy answer\", value=False)\n\nst.sidebar.markdown(\"Enter the cluster URL\")\nmodel_url = st.sidebar.text_input(label=\"Cluster URL\", value=\"http://localhost:8000\")\n\n_model_url = model_url.strip(\"/\")\n\nif st.sidebar.button(\"Send 'is alive' to IP\"):\n    try:\n        health = requests.get(\"{}/health\".format(_model_url))\n        title = requests.get(\"{}/\".format(_model_url))\n        version = requests.get(\"{}/version\".format(_model_url))\n        describe = requests.get(\"{}/describe\".format(_model_url))\n\n        if health.status_code == 200:\n            st.sidebar.success(\"Webapp responding at {}\".format(_model_url))\n            st.sidebar.json({\"title\": title.text, \"version\": version.text, \"description\": describe.text})\n        else:\n            st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n    except ConnectionError:\n        st.sidebar.error(\"Webapp not respond at {}, check url\".format(_model_url))\n\n\n# ---- Main window ----\n\nst.markdown(\"## Inputs\")\nst.markdown(\"Select your model (Small, Medium or Large)\")\n\n# Data input\nmodel_name = st.radio(label=\"Model Name\", options=[\"yolov5s\", \"yolov5m\", \"yolov5l\"])\n\nst.markdown(\"Upload an image\")\n\nimage_file = st.file_uploader(label=\"Image File\", type=[\"png\", \"jpg\", \"tif\"])\n\nconfidence_threshold = st.slider(label=\"Confidence filter\", min_value=0.0, max_value=1.0, value=0.0, step=0.05)\n\n# UploadFile to PIL Image\nif image_file is not None:\n    image_file.seek(0)\n    image = image_file.read()\n    image = Image.open(io.BytesIO(image))\n\nst.markdown(\"Send the payload to {}/predict\".format(_model_url))\n\n# Send payload\nif st.button(label=\"SEND PAYLOAD\"):\n    if test_mode_on:\n        st.warning(\"Simulating a dummy request to {}\".format(model_url))\n        result = make_dummy_request(model_url=_model_url, model=model_name, image=image)\n    else:\n        result = make_request(model_url=_model_url, model=model_name, image=image)\n\n    st.balloons()\n\n    # Display results\n    st.markdown(\"## Display\")\n\n    st.text(\"Model : {}\".format(result.model))\n    st.text(\"Processing time : {}s\".format(result.time))\n\n    detections = [detection for detection in result.detections if detection.confidence > confidence_threshold]\n\n    image_with_preds = draw_preds(image, detections)\n    st.image(image_with_preds, width=1024, caption=\"Image with detections\")\n\n    st.markdown(\"### Detection dump\")\n    for detection in result.detections:\n        st.json(detection.json())\n

    Note

    Le test mode servait pour un ancien BE. Si vous avez tout fait dans l'ordre vous ne devriez pas en avoir besoin

    "},{"location":"1_8_deployment_tp_long.html#construire-le-docker_1","title":"Construire le docker","text":"
    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker build -t eu.gcr.io/${PROJECT_ID}/{your app name}:{your version} -f Dockerfile .\n
    "},{"location":"1_8_deployment_tp_long.html#tester-le-docker_1","title":"Tester le docker","text":"

    Warning

    Malheureusement, sur github codespace cela ne semble pas fonctionner. Nous allons devoir partir du principe que cela fonctionne du premier coup ! Le mieux est donc de s'assurer que le app.py correspond \u00e0 la correction puis de passer \u00e0 la section suivante

    Au lieu de faire streamlit run app.py, vous pouvez lancer le docker localement et aller sur {ip}:8501 pour tester le docker

    PROJECT_ID=$(gcloud config get-value project 2> /dev/null)\ndocker run --rm -p 8501:8501 eu.gcr.io/${PROJECT_ID}/{your app name}:{your version}\n

    Vous pouvez vous rendre sur l'ip de la machine sur le port 8501

    Indiquez l'ip de la machine port 8000 \u00e0 gauche

    "},{"location":"1_8_deployment_tp_long.html#pousser-le-docker-sur-google-container-registry","title":"Pousser le docker sur google container registry","text":"
    gcloud auth configure-docker\ndocker push eu.gcr.io/${PROJECT_ID}/{your-name}-frontend:{your version}\n
    "},{"location":"1_8_deployment_tp_long.html#liens-utiles_1","title":"Liens Utiles","text":"
    • Doc Streamlit
    "},{"location":"1_8_deployment_tp_long.html#4-deployer-le-modele-et-lux-sur-linstance-gcp","title":"4 - D\u00e9ployer le mod\u00e8le et l'UX sur l'instance GCP","text":"

    Nous allons cr\u00e9er une machine virtuelle dans laquelle nous allons lancer les deux containers

    "},{"location":"1_8_deployment_tp_long.html#41-creation-de-la-vm","title":"4.1 Cr\u00e9ation de la VM","text":"

    Nous allons directement cr\u00e9er une machine avec le container du mod\u00e8le d\u00e9j\u00e0 lanc\u00e9

    Commen\u00e7ons par cr\u00e9er une instance GCP bien configur\u00e9e depuis laquelle se connecter:

    N'oubliez pas de renommer le nom de votre instance

    export INSTANCE_NAME=\"tp-deployment-{yourgroup}-{yourname}\" # Don't forget to replace values !\n
    gcloud compute instances create $INSTANCE_NAME \\\n        --zone=\"europe-west1-b\" \\\n        --machine-type=\"n1-standard-2\" \\\n        --image-family=\"common-cpu\" \\\n        --image-project=\"deeplearning-platform-release\" \\\n        --maintenance-policy=TERMINATE \\\n        --scopes=\"storage-rw\" \\\n        --boot-disk-size=75GB\n

    R\u00e9cuperez l'ip publique de la machine (via l'interface google cloud ou bien en faisant gcloud compute instances list | grep {votre instance} et notez l\u00e0 bien

    Depuis le github codespace, connectez vous \u00e0 la machine

        gcloud compute ssh {user}@{instance}\n
    "},{"location":"1_8_deployment_tp_long.html#42-execution-des-containers","title":"4.2 Execution des containers","text":"

    Hint

    A executer dans la VM GCP

    On va utiliser docker compose pour lancer les deux applications en simultan\u00e9 de sorte \u00e0 ce qu'elles communiquent

    Plus d'infos sur docker compose

    • Fermez tous les dockers etc.
    • Cr\u00e9ez un fichier docker-compose.yml

    Sur votre codespace, cr\u00e9ez ce fichier et modifiez le nom des images avec celles que vous avez utilis\u00e9es (respectivement model et frontend)

    version: '3'\nservices:\n  yolo:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5:1.2\"\n    ports:\n      - \"8000:8000\"\n    hostname: yolo\n  streamlit:\n    image: \"eu.gcr.io/third-ridge-138414/yolo-v5-streamlit:1.2\"\n    ports:\n      - \"8501:8501\"\n    hostname: streamlit\n

    Copiez ensuite ce texte sur la VM dans un fichier docker-compose.yml (exemple : via nano)

    On constate qu'on d\u00e9clare 2 services: - 1 service \"yolo\" - 1 service \"streamlit\"

    On d\u00e9clare aussi les ports ouverts de chaque application

    Maintenant... comment lancer les deux applications ?

    docker-compose up dans le dossier o\u00f9 se trouve votre docker-compose.yml

    Hint

    Si docker-compose ne fonctionne pas, sudo apt -y install docker-compose

    Normalement: - le service de mod\u00e8le est accessible sur le port 8000 de la machine - le service streamlit est accessible sur le port 8501 de la machine - vous devez indiquer l'hostname \"yolo\" pour communiquer entre streamlit et le mod\u00e8le. En effet, les services sont accessibles via un r\u00e9seau sp\u00e9cial \"local\" entre tous les containers lanc\u00e9s via docker-compose

    "},{"location":"1_8_deployment_tp_long.html#acces-a-la-vm","title":"Acc\u00e8s \u00e0 la VM","text":"

    Hint

    Cela ne risque de fonctionner que en 4G

    Connectez vous via l'IP publique de la machine via votre navigateur web, sur le port 8501 : http://ip-de-la-machine:8501

    Vous devriez pouvoir acc\u00e9der \u00e0 votre d\u00e9ploiement !

    "},{"location":"1_8_deployment_tp_long.html#conclusion","title":"Conclusion","text":"

    \ud83c\udf89 Bravo ! \ud83c\udf89

    Vous avez d\u00e9ploy\u00e9 votre premier mod\u00e8le en production !

    "},{"location":"2_1_overview.html","title":"Introduction to Data Distribution","text":""},{"location":"2_1_overview.html#course-overview","title":"Course Overview","text":"
    • Data Distribution & Big Data Processing

    Harnessing the complexity of large amounts of data is a challenge in itself.

    But Big Data processing is more than that: originally characterized by the 3 Vs of Volume, Velocity and Variety, the concepts popularized by Hadoop and Google requires dedicated computing solutions (both software and infrastructure), which will be explored in this module.

    "},{"location":"2_1_overview.html#objectives","title":"Objectives","text":"

    By the end of this module, participants will be able to:

    • Understand the differences and usage between main distributed computing architectures (HPC, Big Data, Cloud, CPU vs GPGPU)
    • Implement the distribution of simple operations via the Map/Reduce principle in PySpark
    • Understand the principle of Kubernetes
    • Deploy a Big Data Processing Platform on the Cloud
    • Implement the distribution of data wrangling/cleaning and training machine learning algorithms using PyData stack, Jupyter notebooks and Dask
    "},{"location":"2_2_orchestration.html","title":"Intro to Orchestration and Kubernetes","text":""},{"location":"2_2_orchestration.html#intro-to-orchestration","title":"Intro to Orchestration","text":"

    Link to slides

    "},{"location":"2_2_orchestration.html#intro-to-kubernetes","title":"Intro to Kubernetes","text":"

    Link to slides

    "},{"location":"2_3_kub_handson.html","title":"Kubernetes: Zero to Jupyterhub using Google Kubernetes Engine","text":""},{"location":"2_3_kub_handson.html#what-is-jupyterhub","title":"What is JupyterHub","text":"

    JupyterHub brings the power of notebooks to groups of users. It gives users access to computational environments and resources without burdening the users with installation and maintenance tasks. Users - including students, researchers, and data scientists - can get their work done in their own workspaces on shared resources which can be managed efficiently by system administrators.

    JupyterHub runs in the cloud or on your own hardware, and makes it possible to serve a pre-configured data science environment to any user in the world. It is customizable and scalable, and is suitable for small and large teams, academic courses, and large-scale infrastructure. Key features of JupyterHub

    Customizable - JupyterHub can be used to serve a variety of environments. It supports dozens of kernels with the Jupyter server, and can be used to serve a variety of user interfaces including the Jupyter Notebook, Jupyter Lab, RStudio, nteract, and more.

    • Flexible - JupyterHub can be configured with authentication in order to provide access to a subset of users. Authentication is pluggable, supporting a number of authentication protocols (such as OAuth and GitHub).

    • Scalable - JupyterHub is container-friendly, and can be deployed with modern-day container technology. It also runs on Kubernetes, and can run with up to tens of thousands of users.

    • Portable - JupyterHub is entirely open-source and designed to be run on a variety of infrastructure. This includes commercial cloud providers, virtual machines, or even your own laptop hardware.

    The foundational JupyterHub code and technology can be found in the JupyterHub repository. This repository and the JupyterHub documentation contain more information about the internals of JupyterHub, its customization, and its configuration.

    "},{"location":"2_3_kub_handson.html#zero-to-jupyterhub-using-kubernetes","title":"Zero to Jupyterhub using Kubernetes","text":"

    JupyterHub allows users to interact with a computing environment through a webpage. As most devices have access to a web browser, JupyterHub makes it is easy to provide and standardize the computing environment of a group of people (e.g., for a class of students or an analytics team).

    This project will help you set up your own JupyterHub on a cloud and leverage the clouds scalable nature to support large groups of users. Thanks to Kubernetes, we are not tied to a specific cloud provider.

    "},{"location":"2_3_kub_handson.html#instructions","title":"Instructions","text":"
    • Go here and follow the instructions

    • Use Google Kubernetes Engine to setup your cluster

    Info

    You will use the same method later in the year to setup a Dask Kubernetes cluster using helm

    • Give some people the public IP of your cluster so that they can connect to it... try to make it scale !
    "},{"location":"2_4_functional.html","title":"Functional Programming","text":"

    This section of the course is not given this year.

    "},{"location":"2_4_functional.html#functional-programming-for-distributed-data","title":"Functional Programming for Distributed Data","text":"

    Link to slides

    "},{"location":"2_4_functional.html#introduction-to-julia","title":"Introduction to Julia","text":"

    As the first exercise, you'll need to install Julia and IJulia locally or make a working Julia Colab Notebook. While Colab is sufficient for today's exercises, it is recommended to make a local installation:

    Julia download Julia kernel for Jupyter

    Here is a Colab template from this Github repository which will install the Julia kernel for a single Colab instance.

    Once you have a Julia Jupyter kernel, follow this Julia for Pythonistas notebook.

    Github Colab

    "},{"location":"2_4_functional.html#functional-programming-in-julia","title":"Functional Programming in Julia","text":"

    Julia documentation explaining:

    • Functions, showing that they are first-class
    • the map function which is a higher-order function
    • distributed computing allowing for transfer of functions between threads or workers
    "},{"location":"2_4_functional.html#distributed-data-in-julia","title":"Distributed Data in Julia","text":"

    Julia's base language supports distributed calculation but there are a few packages which facilitate data processing tasks over distributed data:

    • DistributedArrays - A general Array type which can be distributed over multiple workers.
    • JuliaDB - A data structuring package which automatically handles distributed data storage and computation
    • Spark.jl - A Julia interface to Apache Spark. Related blog post.
    "},{"location":"2_4_functional.html#map-reduce-exercise","title":"Map Reduce Exercise","text":"

    The second part of this class is an interactive notebook in the Julia language covering the MapReduce programming framework, from simple addition queries to a grep example.

    MapReduce notebook

    MapReduce notebook on Colab (requires adding Julia kernel installation)

    "},{"location":"2_5_mapreduce.html","title":"Hadoop and MapReduce","text":"

    In this class, we start with an overview of the Big Data ecosystem, contextualizing Hadoop, No-SQL Databases, and Business Intelligence tools. We then cover Hadoop and the HDFS in detail with a simple MapReduce example.

    Slides

    • Introduction to Big Data and its ecosystem (1h)
    • What is Big Data?
    • Legacy \u201cBig Data\u201d ecosystem
    • Big Data use cases
    • Big Data to Machine Learning
    • Big Data platforms, Hadoop & Beyond (2h)
    • Hadoop, HDFS and MapReduce,
    • Datalakes, Data Pipelines
    • From HPC to Big Data to Cloud and High Performance Data Analytics
    • BI vs Big Data
    • Hadoop legacy: Spark, Dask, Object Storage ...

    It contains also a short interactive exercise using Python Map Reduce.

    "},{"location":"2_6_spark.html","title":"Spark","text":"

    In this class, we cover the Apache Spark framework, explaining Resilient Distributed Datasets, SparkSQL, Spark MLLib, and how to interact with a Spark cluster. We use PySpark in a Jupyter notebook to explore RDDs and see an example of distributed K-Means.

    Spark introduction

    Spark notebook

    Spark notebook on Colab

    "},{"location":"2_7_cloud.html","title":"Evolution of Data Management Systems","text":""},{"location":"2_7_cloud.html#fundamental-concepts-methods-and-applications","title":"Fundamental Concepts, Methods and Applications","text":"

    In this three part class, students will cover the history of data management systems, from file systems to databases to distributed cloud storage. This class is given over the length of the Data Engineering course. Questions from the first two parts are integrated into the exam on cloud computing, and questions from the Cloud DMS section are integrated into the Dask notebook evaluation.

    "},{"location":"2_7_cloud.html#objectives","title":"Objectives","text":"

    The objectives of this course are: - Introduce the fundamental concepts - Describe, in a synthetic way, the main characteristics of the evolution of DMS (Data Management Systems) - Highlight targeted application classes.

    "},{"location":"2_7_cloud.html#key-words","title":"Key Words","text":"

    Data Management Systems, Uni-processor DBMS, Parallel DBMS, Data Integration Systems,Big Data, Cloud Data Management Systems, High Performance, Scalability, Elasticity, Multi-store/Poly-store Systems

    "},{"location":"2_7_cloud.html#targeted-skills","title":"Targeted Skills","text":"
    • Effectively exploit the DMS according to the environment (uniprocessor, parallel, distributed, cloud) in a perspective of decision support within an organization.
    • Ability to choose, in a relevant way, a DMS in multiple environments for an optimal functioning of the applications of an organization
    "},{"location":"2_7_cloud.html#indicative-program","title":"Indicative Program","text":"
    1. Introduction to Main Problems of Data Management

      • From File Management Systems FMS to Database MS DBMS
      • Motivations, Objectives, Organizations & Drawbacks
      • Databases & Rel. DBMS: Motivations & Objectives
      • Resources:
        • Introduction
        • SGF - File Systems
        • Views - Relational Systems
        • File Organization
    2. Parallel Database Systems

      • Objectives and Parallel Architecture Models
      • Data Partitioning Strategies
      • Parallel Query Processing
      • Resources:
        • Parallel DBMS
        • Parallel Queries
        • Systems DB Parallel
    3. From Distributed DB to Data Integration Systems DIS

      • An Ex. of DDB, Motivations & Objectives
      • Designing of DDB
      • Distributed Query Processing
      • An Ex. of DIS
      • Motivations & Objectives
      • Mediator-Adapters Architecture
      • Design of a Global Schema (GAV, LAV)
      • Query Processing Methodologies
      • Resources:
        • Distributed DBMS - Chapter 1
        • Distributed DBMS - Chapter 2
        • Distributed DBMS - Chapter 3
        • Systems for integrating heterogeneous and distributed data
        • Integration Systems complement
        • Distributed DBMS Dec 2023
    4. Cloud Data Management Systems CDMS

      • Motivations and Objectives
      • Main Characteristics of Big Data and CDMS
      • Classification of Cloud Data Management Systems CDMS
      • Advantages and Weakness of Parallel RDBMS and CDMS
      • Comparison between Parallel RDBMS and CDMS
      • Introduction to Multi-store/Ploystore Systems
      • Resources:
        • Cloud Systems
        • MapReduce examples
    5. Conclusion

      • Maturity of Cloud DMS
      • Key Criteria for Choosing a Data Management System
    "},{"location":"2_7_cloud.html#additional-reading","title":"Additional Reading","text":"
    1. Principles of Distributed Database Systems, M. Tamer Ozsu and Patrick Valduriez; Springer-Verlag ; Fourth Edition, December 2019.

    2. Data Management in the Cloud: Challenges and Opportunities Divyakant Agrawal, Sudipto Das, and Amr El Abbadi; Synthesis Lectures on Data Management, December 2012, Vol. 4, No. 6 , Pages 1-138.

    3. Query Processing in Parallel Relational Database Systems; H. Lu, B.-C Ooi and K.-L. Tan; IEEE Computer Society Press, CA, USA, 1994.

    4. Traitement parall\u00e8le dans les bases de donn\u00e9es relationnelles : concepts, m\u00e9thodes et applications Abdelkader Hameurlain, Pierre Bazex, Franck Morvan; C\u00e9padu\u00e8s Editions, Octobre 1996.

    "},{"location":"2_8_dask.html","title":"Dask on Kubernetes","text":"

    In this class, we focus on getting a Dask cluster running in Kubernetes, which we will then use in the Dask project. Dask is a parallel computing library in Python which integrates well with machine learning tools like scikit-learn.

    This class builds on the orchestration class, going into further detail on K8S specifics.

    Kubernetes

    Dask presentation

    Students will use GCP for this class. Be sure to stop your cluster after class to conserve GCP credits.

    Additional resources can be found in the dask documentation.

    "},{"location":"2_8_dask.html#deploying-a-dask-hub","title":"Deploying a Dask Hub","text":"

    This material is taken from the following docs:

    • https://docs.dask.org/en/latest/setup/kubernetes-helm.html
    • https://zero-to-jupyterhub.readthedocs.io/en/latest/kubernetes/setup-kubernetes.html
    • https://zero-to-jupyterhub.readthedocs.io/en/latest/kubernetes/setup-helm.html
    "},{"location":"2_8_dask.html#creating-a-kubernetes-cluster","title":"Creating a Kubernetes Cluster","text":"

    First, you need to enable the Kubernetes API if not already done:

    • Go to console.cloud.google.com
    • Select the Kubernetes Engine in the menu
    • Enable the API if not already done.

    Then you'll need a terminal with gcloud and kubectl. The simplest is just to use the Google Cloud Shell from console.cloud.google.com. If you prefer, you can follow the links above to find how to install everything on your computer.

    Ask Google Cloud to create a managed Kubernetes cluster and a default node pool to get nodes from:

    gcloud container clusters create \\\n  --machine-type n1-standard-4 \\\n  --enable-autoscaling \\\n  --min-nodes 1 \\\n  --max-nodes 10 \\\n  --num-nodes 1 \\\n  --zone europe-west1-b \\\n  --cluster-version 1.23 \\\n  dask-hub-k8s\n

    Yhis will take a few minutes (maybe 2 or 3).

    gcloud container clusters list\n

    You can then test if the cluster is running:

    kubectl get node\n

    Then get permissions to perform all administrative actions needed.

    \u26a0\ufe0fDon't forget to replace your email below.\u26a0\ufe0f

    kubectl create clusterrolebinding cluster-admin-binding \\\n  --clusterrole=cluster-admin \\\n  --user=<GOOGLE-EMAIL-ACCOUNT>\n
    "},{"location":"2_8_dask.html#setting-up-helm","title":"Setting up Helm","text":"

    From your Google Cloud Shell or terminal:

    curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash\nhelm list\n

    should return:

    NAME    NAMESPACE       REVISION        UPDATED STATUS  CHART   APP VERSION\n
    "},{"location":"2_8_dask.html#helm-install-a-dask-hub","title":"Helm install a Dask Hub","text":"

    Default Daskhub configuration uses dask-gateway, which is here to handle multiple users with fine grain authorisations. We don't need this, and it iss a little more complicated setup than what we'll do.

    Instead, we'll deploy a Daskhub with dask-kubernetes, which assumes some authorisations inside the Pods of the Kubernetes cluster (potential security leak), but is more straightforward for our usage.

    Verify that you\u2019ve set up a Kubernetes cluster and added Dask\u2019s helm charts:

    helm repo add dask https://helm.dask.org/\nhelm repo update\n

    Generate token to configure Jupyterhub:

    openssl rand -hex 32  > token1.txt\ncat token1.txt\n

    Create the file below (for example using vim or cloud shell editor) and substitute the value.

    # file: daskhub-config.yaml\njupyterhub:\n  proxy:\n    secretToken: \"<token-1>\"\n  scheduling:\n    podPriority:\n      enabled: true\n    userPlaceholder:\n      # Specify three dummy user pods will be used as placeholders\n      replicas: 1\n    userScheduler:\n      enabled: true\n  singleuser:\n    serviceAccountName: daskkubernetes\n    image:\n      name: guillaumeeb/pangeo-ml-notebook # Image to use for singleuser environment. Must include dask-kubernetes.\n      tag: 2021.11.14\n\ndask-gateway:\n  enabled: false\n  gateway:\n    auth:\n      type: simple\n      simple:\n        password: \"unused\"\n\ndask-kubernetes:\n  enabled: true\n

    Now we just install Dask Hub:

    helm upgrade --wait --install --render-subchart-notes \\\n    --namespace daskhub \\\n    --create-namespace \\\n    dhub dask/daskhub \\\n    --values=daskhub-config.yaml\n

    This will again take a few minutes.

    helm list -n daskhub\n

    Check install and go to Jupyter!

    To get the public IP of your hub deployment:

    kubectl --namespace=daskhub get service proxy-public\n

    Get the external IP, and open it in your browser. You should be able to login with any username/password Ensure Dask is working, and K8S mecanisms too!

    "},{"location":"2_8_dask.html#create-a-dask-kubernetes-cluster","title":"Create a dask-kubernetes cluster","text":"

    Create a yaml file within the Jupyterhub interface:

    # worker-spec.yaml\n\nkind: Pod\nmetadata:\n  labels:\n    foo: bar\nspec:\n  restartPolicy: Never\n  containers:\n  - image: guillaumeeb/pangeo-ml-notebook:2021.11.14\n    imagePullPolicy: IfNotPresent\n    args: [dask-worker, --nthreads, '2', --no-dashboard, --memory-limit, 6GB, --death-timeout, '60']\n    name: dask\n    env:\n      - name: EXTRA_PIP_PACKAGES\n        value: xgboost\n    resources:\n      limits:\n        cpu: \"2\"\n        memory: 6G\n      requests:\n        cpu: \"1.7\"\n        memory: 6G\n

    Just open a notebook in your newly created Dask enabled hub, and try to copy and past the following cells:

    Set some config to ease usage.

    import dask\nimport dask.distributed  # populate config with distributed defaults\nimport dask_kubernetes\n\ndask.config.set({\"kubernetes.worker-template-path\": \"worker-spec.yaml\"})\ndask.config.set({\"distributed.dashboard.link\": \"{JUPYTERHUB_SERVICE_PREFIX}proxy/{port}/status\"})\n

    Create a cluster object.

    from dask_kubernetes import KubeCluster\n\ncluster = KubeCluster(deploy_mode='local') # Scheduler is started in the notebook process\ncluster\n

    This should display a fancy widget. You can open the Dask Dashboard from here.

    Now scale the cluster to get Dask-workers and connect to it.

    cluster.scale(20)\n
    from distributed import Client\n\nclient = Client(cluster)\nclient\n

    What's happening in your K8S cluster after some seconds/minutes? Launch some computation, what about Pi?

    We'll use Dask array, a Numpy extension, for this:

    import dask.array as da\n\nsample = 10_000_000_000  # <- this is huge!\nxxyy = da.random.uniform(-1, 1, size=(2, sample))\nnorm = da.linalg.norm(xxyy, axis=0)\nsumm = da.sum(norm <= 1)\ninsiders = summ.compute()\npi = 4 * insiders / sample\nprint(\"pi ~= {}\".format(pi))\n

    How many workers did you get? Why?

    Now just close the cluster.

    cluster.close()\n

    What happens after a few minutes?

    "},{"location":"2_8_dask.html#deleting-a-kubernetes-cluster","title":"Deleting a Kubernetes Cluster","text":"

    Get your cluster name and region

    gcloud container clusters list\n

    Delete your kubernetes cluster

    gcloud container clusters delete <YOUR_CLUSTER_NAME> --region <YOUR_CLUSTER_REGION>\n
    "},{"location":"2_9_project.html","title":"Project - Dask","text":"

    The evaluation for this class is a Dask notebook. You should run this notebook on a Daskhub using Kubernetes, like in the Dask on Kubernetes class. You should complete the exercises and answer the questions in the notebook, then turn it in through the LMS. You should work in a group of 2 to 4 and separate the work out equally between responding to questions, managing the infrastructure, and trying out different algorithms. Be sure to include the names of your group members in your submission.

    The notebook is due on March 12, 2024 at 23h59.

    Dask tutorial, if needed

    Evaluation notebook

    LMS depot

    "},{"location":"ctf.html","title":"Data Engineering Fundamentals Capture the Flag","text":"

    This class is a five day Capture the Flag event to get to know with the basics of systems usage, specifically linux, git, and ssh. There is also a large section on python, with an emphasis on data science scripting practices using numpy and pandas in jupyter notebooks.

    This is a self-guided exercise with resources and questions on this site. You, the participant, must look for the answer to the questions through reading documentation, discussing with others, and trying things. Try to avoid searching for answers online in a search engine; the answers can almost always be found in documentation.

    Answers can be submitted through an API with the CTF server. Questions will be made available over the course of 5 sessions. Responding correctly to a question gives 1 point, and an additional 0.5 points are awarded for being the first to submit the correct answer to a question. That half point is the flag - be the first to capture it!

    If you're speeding through the questions, consider helping others learn the material. Depending on your background, you may have varied experience with these tools. Get to know the other participants by helping them capture a flag too.

    "},{"location":"ctf.html#linux","title":"Linux","text":"

    Linux is an open-source operating system based on Unix. It is a standard choice for development and is the most dominant operating system for web servers, cloud computing, and high performance computing at 80% of global public servers. There are many different distributions but they share a common set of tools, notably GNU software. A very common Linux distribution is Android, at 73% of all mobile devices, so you might be a Linux user already without realizing it!

    You most likely don't use Linux as the operating system of your personal computer, however. If you are using one the 2.5 % of personal computers with Linux, you can skip straight to the Submission section

    MacOS is also based on Unix, so if you're using MacOS, most things should work just as in Linux! A few commands will be different from the course instructions, and the questions will always refer to Linux resources, for example documentation. It is highly recommended to install homebrew (https://brew.sh/) which will allow for package installation via the command line.

    "},{"location":"ctf.html#installation-on-windows","title":"Installation on Windows","text":"

    The easiest way to use Linux on Windows is through the Windows Subsystem for Linux. Installation instructions are here: https://docs.microsoft.com/en-us/windows/wsl/install. Make sure to follow all instructions carefully. If asked to join a \"Windows Insiders Program\", ignore this. By default, this installs Ubuntu, which is good for this systems class and for all of SDD.

    The WSL is similar to a virtual machine inside of Windows, but it integrates with some existing components of Windows. You can access your Windows files from Linux at /mnt/, but you should make sure you're familiar with Linux first.

    • About the WSL
    • WSL FAQ
    • How to Access WSL Linux Files from Windows
    "},{"location":"ctf.html#submission","title":"Submission","text":"

    All questions will be posted to the CTF github repository. In the second class, we will use git to download this repository locally, and it will be used to host the files and data needed to respond to questions.

    The CTF server's IP address is 34.155.94.97. You can see a leaderboard there and it is the address for submitting answers. The first way we'll look at submitting answers is with curl in Linux.

    Once you have a Unix-type environment, either native Linux or macOS, or through the WSL, you're ready to submit to the CTF. You will use the curl command; you can verify that you have curl by running which curl in the command line. curl is a tool for transferring data from or to a server. How do you know that? By checking the documentation of curl using man curl. Try it out!

    To respond to a question, send a POST request with the data of the question number and answer, and your username as user (your username should be your ISAE login, but you can also check on the leaderboard). For example, the first question asks where the curl executable is (hint: use which). Then use curl:

    curl -X POST 'http://34.155.94.97/' \\\n    -d 'number=1' \\\n    -d 'answer=your answer here' \\\n    -d 'user=your username here'\n

    Some of the questions will require access to some files, called file_a.txt, file_b.txt, and file_c.txt. Those are available on the CTF git repository.

    You are ready to start answering questions! If you don't know an answer, check the resources below and read documentation using man.

    You can see which questions you have answered by sending a GET request:

    curl 'http://34.155.94.97/user/d.wilson'\n

    You can also see which questions have remaining flags, the bonus points associated with answering the question for the first time, with a GET request:

    curl 'http://34.155.94.97/answers/'\n
    "},{"location":"ctf.html#python-submission","title":"Python Submission","text":"

    Note that you can use the requests library to submit responses:

    import requests\ndata = {\"number\": \"1\",\n        \"answer\": \"\",\n        \"user\": \"d.wilson\"}\nr = requests.post(\"http://34.155.94.97/\", data=data)\n
    "},{"location":"ctf.html#bash-resources","title":"Bash Resources","text":"
    • ISAE class on CLI, Linux, and Bash
    • Shell class from MIT
    • Bash exercises
    • More bash exercises
    • Short exercises in regular expressions
    "},{"location":"ctf.html#linux-tools","title":"Linux tools","text":"

    Now that you're an expert in Linux, let's quickly look at some useful tools. You may need to install some of these, either using apt, brew, yum, pacman, or whichever package manager you use. Linux comes with many programs installed by default, especially distributions like Ubuntu, however the tools in this section will be more useful than the base Linux tools. We'll cover four: apt for package management, top for system monitoring, tmux for terminal management, and vim for file editing. There are alternatives to all of these programs that are great, but it is worth being familiar with these four.

    "},{"location":"ctf.html#linux-resources","title":"Linux Resources","text":"
    • apt manual
    • Alternatives to top
    • Guide to tmux
    • tmux cheat sheet
    • Editors from MIT class
    • Vim adventures
    • tldr, short man pages
    "},{"location":"ctf.html#git","title":"Git","text":"

    Git is a version control system used worldwide for maintaining code, documents, video games, and much more. It has seen wide adoption with servers like Github and Gitlab while being an open-source tool that anyone can install as a client or server. In this class, we will look at repositories hosted on Github, but git is much larger than that and many organizations like ISAE have their own private git server.

    "},{"location":"ctf.html#installation","title":"Installation","text":"

    If you're using Ubuntu, chances are you already have git. If not, simply do:

    sudo apt install git

    These questions concern two repositories: the Machine Learning class in SDD (https://github.com/SupaeroDataScience/machine-learning) and the Seaborn library, a popular graphing library (https://github.com/mwaskom/seaborn). You will need to download both repositories. First choose a directory to host them in, for example ~/SDD/FSD312:

    mkdir -p ~/SDD/FSD312\ncd ~/SDD/FSD312\n

    and then download them using git clone:

    git clone https://github.com/SupaeroDataScience/machine-learning.git\ngit clone https://github.com/mwaskom/seaborn.git\n

    The commit for all questions on the seaborn repository is 1e6739 :

    git checkout 1e6739\n
    "},{"location":"ctf.html#git-resources","title":"Git Resources","text":"
    • Git course
    • Introduction to github
    • Github video course
    • Learn git branching
    • Git SCM book
    • Git cheat sheet
    "},{"location":"ctf.html#git-exercise","title":"Git Exercise","text":"

    In order to access the server for the next parts of the CTF, you will need to provide your public ssh key. The SSH section has references explaining public-key cryptography, but in general you will make a key pair with a private side and public side. You will give the public side to services like this class or Github to perform secure communication, keeping your private key secret to prove that it is you.

    First, start by making a key pair and uploading your public key to Github. This will allow you use to SSH to make push requests, instead of using a personal access token. Create an SSH key and add it to your Github account.

    Then, we will use git as a way for you to transfer your public key to the class. We could use another means, like a USB key, email, or a very large QR code, but for this exercise we will use git. First make a fork of the https://github.com/SupaeroDataScience/ctf2024 repository. Then, make a pull request with your key as a file in keys/. Please name your key with your name, like the example keys/dennis-wilson.pub. Be sure to upload only your public key. Do not ever upload your private key to public servers.

    Once your key is in the repository, you are ready for the SSH and Python portions of the CTF.

    "},{"location":"ctf.html#ssh","title":"SSH","text":"

    For the ssh section, you will connect to the CTF server to answer questions about the remote environment. Your public key must be uploaded to the git repository above to get access to the server. You will use the corresponding private key to access the server. Your user on the server is ctf and the IP is the same as the CTF webserver: 34.155.94.97.

    Please note that ISAE-EDU and ethernet block ssh to most servers, including this one and github.com. In order to ssh to the server, you will need to either use the eduroam network or a different network like a mobile hotspot.

    "},{"location":"ctf.html#ssh-resources","title":"SSH Resources","text":"
    • Ubuntu ssh manual
    • Guide in French
    • Cryptographie Asym\u00e9trique
    • How SSH works
    "},{"location":"ctf.html#python","title":"Python","text":"

    An overview and reminder of the python programming language, with a focus on numpy and pandas manipulation using Jupyter.

    "},{"location":"ctf.html#installation_1","title":"Installation","text":"

    You most likely have python installed on your Linux system, but it is worthwhile to make sure and to upgrade. Python 3.8, 3.9, or 3.10 are all supported.

    sudo apt install python3\n

    It is highly recommended to make a virtual environment to manage your python packages. There are three main libraries for virtual environments:

    • Virtualenv
    • Pipenv
    • Conda

    Virtualenv is recommended for new users on Linux. Conda, or the platform Anaconda, can be useful on Windows as many packages are built specifically for windows, but not all packages are available via conda. Pipenv is an exciting project aimed at Python developers, but it adds additional complexity.

    Once you have a virtual environment created, please install the following packages for the rest of the Seminars class:

    numpy\npandas\nscipy\nmatplotlib\njupyter\n

    The following packages will also be used in SDD:

    seaborn\nscikit-learn\nkeras\ntorch\ngeos\ngraphviz\nnltk\nnetworkx\nstatsmodels\npyspark\ncython\ncma\ngym\n
    "},{"location":"ctf.html#jupyter","title":"Jupyter","text":"

    Jupyter (stands for the three original languages in the project: Julia, Python, and R) is a way to use and develop code interactively in the browser. Once you've installed the jupyter package, you can run a Jupyter notebook by simply running jupyter notebook.

    For Windows users, you can run Jupyter in the WSL. As explained in this blog post, you simply need to execute jupyter notebook --no-browser on the WSL and then copy and paste the URL and token generated into a Windows browser.

    Some additional packages for improving Jupyter are nbopen nbdime RISE. Be sure to read their documentation before installing to verify if these are relevant to you.

    "},{"location":"ctf.html#python-resources","title":"Python Resources","text":"
    • Python 3 Documentation
    • Pip documentation
    • Pandas cheatsheet
    • Stanford Python and Numpy tutorial
    • Python seminar
    • Google Colab: Jupyter notebooks on the cloud
    • Binder: Also Jupyter notebooks on the cloud, not hosted by Google
    "}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index fbb5882..cd79fbc 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,147 +2,147 @@ https://supaerodatascience.github.io/DE/index.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/0_1_databases.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/0_2_ETL.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/0_3_dbms.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/0_3_postgres.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/0_4_project.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_1_overview.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_2_cloud.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_2_setup_codespace.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_3_gcp_handson.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_3_gcp_lecture.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_4_containers.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_4_docker_tp.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_5_be.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_6_conclusion.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_7_readings.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_8_deployment.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_8_deployment_tp.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/1_8_deployment_tp_long.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_1_overview.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_2_orchestration.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_3_kub_handson.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_4_functional.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_5_mapreduce.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_6_spark.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_7_cloud.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_8_dask.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/2_9_project.html - 2024-11-05 + 2024-11-12 daily https://supaerodatascience.github.io/DE/ctf.html - 2024-11-05 + 2024-11-12 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index bdab91d7f37643b8102b05212fb32bf4bfc998e9..5201fb89a0e5364f1ce5e4422b43f7676ac1956c 100644 GIT binary patch delta 446 zcmV;v0YU!H1J45oABzYGLESTv2OWR0ny&5OxR-UO9lNt2ON^~WmNZdn+P5Fw7DKS> zfCALTHcitX1d9CGKfEk{^9&hX42Py`cg+TZiPnYb(ENP*zP)Q6hO542k5D$`9PrT0 zrKGPyoX_VLU4ca>Y-64TnwcI@d>F;9y>EuA4Xri6``z2>eKf-w1t|>Hq#=JttBXeC z=FQg$ov5+PN&g7wdPIVKkN{+ClE*e%p21PLj)q?ZVB>I!tiP@O!Y@ z`PW`tSl;=>U%>xC-KkEkFGr1lT#%)DeEPu!_FCC-K|Zu8Vwp0ZWZ0{e<1a9U2XIFR zE>ih=c5voGES2%GT0Xc``D=et)D!@dCI=?q*J?6JQFMzj@PhVq0BdHm(|byeP_zrA zBls}eXikt-NnDU`)W{|!zN{%>d|h=V7v@`)L9fb}O%mTJ3(3dV1w!G-%;@m_gROiF z6AyODP9sSY9Wm1|i6!g8rh-W7-lsb9k{psc;jsp@ZnO4Wm~YggQwAcEg1M(@viI@}s0EQXg(f|Me delta 446 zcmV;v0YU!H1J45oABzYGlNl zfCALTHcitX1d9AQ+`q1F^I{U%;15mTcFo3k6}0v9q51XnV|&}&k5@xU4ou#lvxh^o z+q{&iwShf6GV8T@ZI`^dfz|IW!$`<$&?E2oZrk_UZjUaXwhK44NI#onGS8!y z*1Zkv!t%k#{sR6FYA1S8UrrJXP(kM6@#!ZO*lVGu73k2$0C`Syl3^!e3croYG=O_N zZ~@ELi#2B|#9SyBil&2$mA`*Pfla|cl;}VN{94Q^3XHBHcv{d74`4}Tc6x`&;j^?J zbp#z|9n{Grks}r4y_m?P#J8Ff%9W}ssW9J&WMoyoWRmz+XcJv{TaC{YnF$@ff3SrM zex||B$!R2xq9Y_6CLt%8$y5-I-TPD~T9SifCp;Ff>NZP9g?TSl8Ivy2g^C>I*bz}Z orxO+28%k3OujN?+na#bUHD#-O{2I{r3yz!PACOAwPnZ+{0J=ojegFUf diff --git a/slides/dist/theme/fonts/league-gothic/league-gothic.eot b/slides/dist/theme/fonts/league-gothic/league-gothic.eot old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/league-gothic/league-gothic.ttf b/slides/dist/theme/fonts/league-gothic/league-gothic.ttf old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/league-gothic/league-gothic.woff b/slides/dist/theme/fonts/league-gothic/league-gothic.woff old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.eot b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.eot old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.ttf b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.ttf old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.woff b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-italic.woff old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.eot b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.eot old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.ttf b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.ttf old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.woff b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-regular.woff old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.eot b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.eot old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.ttf b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.ttf old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.woff b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibold.woff old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.eot b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.eot old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.ttf b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.ttf old mode 100755 new mode 100644 diff --git a/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.woff b/slides/dist/theme/fonts/source-sans-pro/source-sans-pro-semibolditalic.woff old mode 100755 new mode 100644 diff --git a/slides/plugin/markdown/plugin.js b/slides/plugin/markdown/plugin.js old mode 100755 new mode 100644 diff --git a/slides/plugin/math/katex.js b/slides/plugin/math/katex.js old mode 100755 new mode 100644 diff --git a/slides/static/.DS_Store b/slides/static/.DS_Store deleted file mode 100644 index d25f20d5c1ce3f4ca42fb9444d396adcce4d578e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHMU2GIp6u#fIg&7#yDL>Kybe1g0Un@&de#LCNErL*~Z2wzXc6UaYFgsIsX18cX zt55y}<4;VCQ4?RR|4%5!sKf`9s1L>z6EO0iFPLbe@?_$JXXeiG)6ypsqv_ma?z!jO zbI!~;^PRbS?=r^FTGBT#*1#AO>GG*Em%8gTf8M{QG$EAK5Cz#YcQ|J`IpR*{hNoyp zf+zz~2BHi^8Hh3vW#GS%0a~;DCQh^N^Vz76G7x28MrOeOKE&wqX)>f^oCco`>Vho* z(P|P0jmC;UAQ}s4GNfai(hN0lQ=;4y;T8kjoaC`kFB#G?PPsWlxcNYMXM{Tx1a~L@ zu|S<6$!XL_8Hh44H3Rau8Y3BHVal<+tZ5s?z`)cEQW@$sZQU7ZbxOAG2DWKFAxe^z zQ5KJmCK_WK5*v>-#zr@6Zr;)u+q!wvv15|BD87FC{>%}pU^`Fn=STPrVDl!$^W>Fa zbT@D1=;3_TFh0vIr}+6qRda5b$9KLGpEPH#JWuWE?dj{6Gs^rj1+s70GacLQF^VqH zj8f-Pz`OH~-B~pJ`QE%^l?t|-QR?$qD_^p*p3!1iY4bsYWOZ)7=vY>#iKh1ruqh4r|6{uXMHC5=UPh1HTE0i)GQlq7wQFzZ zM4eJUe}TM69-z$Ho_U{X6#KJo-mrDkKGZ#8>fYhrvgw*R%jmMR1w&HCrNF2`U844t za_0Rd(|bTv;^M@7rM^y9wU{I-?+@v8`wmC<9qPf1s%k4lanP5iiU##uEUT;6kU_S~ zr8v0@x#<>3Qrxa1^!B-_+`Lhg6usNZ<_s$oZQeqnJ-ua~>QH$54pFK{V_)7W6et{* zi`AA^QB+PH@bA4h>w5c!4g2y_?Yxub&J4JRvqfXy2mvs(*||p)gBM70->6+TJ*OCi z-l1x0zwfL?*9~3dk*jLDi#&asQ54fIn*@1!g&g@}yz(02$wod5av4u1I499cb|Y(M zZEP<)$ck)~9cM4H*VqYmntjgBvJ32Ic8UE7U>0UW!W=BXGTaCaF|5H_tV0vFA%!-y z;~orP5JNbCJS-GoV;Choj7Knr$M86w#dCNbFW?otj&Zz!w{ZfW;8UE!XZQx^a30^| z2VBJO_yd=O8lhHb5L97_5EoVmD}`0UHetKaCUgkSbP#&ORGCZlk zmM)WT(z>Y(jHA;Pr83(Gyg1=&FPGz*QbUEp*_-9nnlg)ugtP0kCQT8jHaVNnnq{Sm zDo+Y*Z_}D&1tMp*%G+h7nrcv*SgI~*kmY0xRphwB0@ak(t|@*&iT@DT=h%Mjb-m|^P2$9Y+7TDktbKG+DJ5%lMdk(diJondijK5*>=z?aUY34yhb7rUI!zSAl(>j