From b952952ea3707c73e9a6131e473fc0a5585a1fca Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Fri, 17 May 2024 14:38:47 +0100 Subject: [PATCH] Updated docs and dev setup files --- .env | 5 -- .env-example | 29 ++++++++ .gitignore | 1 + .vscode/launch-example.json | 137 ++++++++++++++++++++++++++++++++++++ README.md | 53 ++++---------- 5 files changed, 179 insertions(+), 46 deletions(-) delete mode 100644 .env create mode 100644 .env-example create mode 100644 .vscode/launch-example.json diff --git a/.env b/.env deleted file mode 100644 index 27ef84c..0000000 --- a/.env +++ /dev/null @@ -1,5 +0,0 @@ -DATABASE_URL= -S3_REGION= -S3_HOST= -S3_KEY= -S3_SECRET= diff --git a/.env-example b/.env-example new file mode 100644 index 0000000..22522d9 --- /dev/null +++ b/.env-example @@ -0,0 +1,29 @@ +# Used for local development, including when doing VS Code Debugging (referenced by .vscode/launch.json) + +DB_USER=refresh +DB_PASS= +DB_HOST=localhost +DB_NAME=refresher +DB_PORT=5432 +DB_SSL_MODE=disable + +AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;QueueEndpoint=http://localhost:10001/devstoreaccount1;TableEndpoint=http://localhost:10002/devstoreaccount1;" +AZURE_STORAGE_CONTAINER_SOURCE=source +AZURE_STORAGE_CONTAINER_CLEAN=clean +ACTIVITIES_LAKE_CONTAINER_NAME=lake + +SCHEMA_VALIDATION_API_URL=VALIDATOR_HOSTNAME_HERE/api/pvt/schema-validate-file +SCHEMA_VALIDATION_KEY_NAME=x-functions-key +SCHEMA_VALIDATION_KEY_VALUE= + +VALIDATOR_API_URL=VALIDATOR_HOSTNAME_HERE/api/pub/validate +VALIDATOR_API_KEY_NAME=x-functions-key +VALIDATOR_API_KEY_VALUE= + +SOLR_API_URL=http://localhost:8983/solr/ +SOLR_USER=x +SOLR_PASSWORD=y +SOLR_PARALLEL_PROCESSES=5 +SOLR_500_SLEEP=60 + +LOG_LEVEL=debug diff --git a/.gitignore b/.gitignore index e074838..80c77be 100644 --- a/.gitignore +++ b/.gitignore @@ -139,6 +139,7 @@ cython_debug/ .vscode/* !.vscode/extensions.json +!.vscode/launch-example.json pyenv scratch diff --git a/.vscode/launch-example.json b/.vscode/launch-example.json new file mode 100644 index 0000000..3b5b155 --- /dev/null +++ b/.vscode/launch-example.json @@ -0,0 +1,137 @@ +{ + + + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { // Pipeline - 01 - Refresh + "name": "Pipeline - 01 - Refresh", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "refresh"], + "envFile": "${workspaceFolder}/.env" + + // "env": { + // "DB_USER": "", + // "DB_PASS": "", + // "DB_HOST": "", + // "DB_NAME": "", + // "DB_PORT": "", + // "DB_SSL_MODE": "disable", + + // "AZURE_STORAGE_CONNECTION_STRING": "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;QueueEndpoint=http://localhost:10001/devstoreaccount1;TableEndpoint=http://localhost:10002/devstoreaccount1;", + // "AZURE_STORAGE_CONTAINER_SOURCE": "source", + // "AZURE_STORAGE_CONTAINER_CLEAN": "clean", + // "ACTIVITIES_LAKE_CONTAINER_NAME": "lake", + + // "SCHEMA_VALIDATION_API_URL": "VALIDATOR_HOSTNAME_HERE/api/pvt/schema-validate-file", + // "SCHEMA_VALIDATION_KEY_NAME": "x-functions-key", + // "SCHEMA_VALIDATION_KEY_VALUE": "", + + // "VALIDATOR_API_URL": "VALIDATOR_HOSTNAME_HERE/api/pub/validate", + // "VALIDATOR_API_KEY_NAME": "x-functions-key", + // "VALIDATOR_API_KEY_VALUE": "", + + // "SOLR_API_URL": "http://localhost:8983/solr/", + // "SOLR_USER": "x", + // "SOLR_PASSWORD": "y", + // "LOG_LEVEL": "debug", + // "SOLR_PARALLEL_PROCESSES": "5", + // "SOLR_500_SLEEP": "60" + // } + }, + + { // Pipeline - 02 - Reload + "name": "Pipeline - 02 - Reload", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "reload"], + "envFile": "${workspaceFolder}/.env" + }, + + { // Pipeline - 04 - Validate + "name": "Pipeline - 04 - Validate (against local)", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "validate"], + "envFile": "${workspaceFolder}/.env" + }, + + { // Pipeline - 04 - Validate (using Azure dev) + "name": "Pipeline - 04 - Validate (against dev)", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "validate"], + "envFile": "${workspaceFolder}/.env-dev-validator" + }, + + + { // Pipeline - 05 - Copy valid + "name": "Pipeline - 05 - Copy valid", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "copy_valid"], + "envFile": "${workspaceFolder}/.env" + }, + + { // Pipeline - 06 - Clean invalid + "name": "Pipeline - 06 - Clean invalid", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "clean_invalid"], + "envFile": "${workspaceFolder}/.env" + }, + + { // Pipeline - 07 - Flatten + "name": "Pipeline - 07 - Flatten", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "flatten"], + "envFile": "${workspaceFolder}/.env" + }, + + { // Pipeline - 08 - Lakify + "name": "Pipeline - 08 - Lakify", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "args": ["-t", "lakify"], + "envFile": "${workspaceFolder}/.env" + }, + + { // Pipeline - 09 - Solrize + "name": "Pipeline - 09 - Solrize", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/handler.py", + "console": "integratedTerminal", + "justMyCode": false, + "args": ["-t", "solrize"], + "envFile": "${workspaceFolder}/.env" + }, + ], + "compounds": [ + { + "name": "Compound", + "configurations": [] + } + ] +} diff --git a/README.md b/README.md index 0107150..f7413c5 100644 --- a/README.md +++ b/README.md @@ -45,56 +45,27 @@ Before running any task, the Refresher checks the version of the database agains ## Create Local Database -- Creates a database called `refresher` owned by `refresh` -  `createdb refresher -O refresh` +- Create a database called `refresher` owned by `refresh`: -## launch.json - -Setup a `.vscode/launch.json` to run locally with attached debugging like so: - -```json -{ - "configurations": [ - { - "name": "Refresh - Local", - "type": "python", - "request": "launch", - "program": "${workspaceFolder}/src/handler.py", - "console": "integratedTerminal", - "args": ["-t", "refresh"], - "env": { - "AZURE_STORAGE_CONNECTION_STRING": "", - "AZURE_STORAGE_CONTAINER_SOURCE": "", - "SOLR_API_URL": "http://localhost:8983/solr/", - "DB_USER": "refresh", - "DB_PASS": "", - "DB_HOST": "localhost", - "DB_NAME": "refresher", - "DB_PORT": "5432", - "DB_SSL_MODE": "disable", - "PARALLEL_PROCESSES": "10" - } - } -``` +  `createdb refresher -O refresh` ## Environment Variables -See `src/constants/config.py` for all Environment Variables and Constants with descriptions. Additional information can be found below as well. +The canonical source for environment variables and constants is `src/constants/config.py`. + +To get started, copy `.env-example` to `.env` and fill in as needed. As a minimum, you'll need to set up the database password (the other database values are predone for the unified pipeline docker setup). + +If wanted to run the validate stage, you must replace `VALIDATOR_HOSTNAME_HERE` with the validator you want to use (e.g., a local copy, or the dev instance). + +The `.env` file is referenced by VS Code's `launch.json`, and so is also used to setup the environment for interactive debugging sessions with VS Code. -### AZURE_STORAGE_CONNECTION_STRING -- This can be found in the Azure Portal > Storage Account > Access Keys or by running `az storage account show-connection-string -g MyResourceGroup -n MyStorageAccount` +## VS Code `launch.json` -### DB\_\* +If using VS Code, copy `launch-example.json` to `launch.json`. This will allow you to run and debug each of the different pipeline stages from within VS Code. This file references the `.env` file, and you can have different launch tasks work with different environment files, as shown in the example file with two different validate stages. -Example for connecting to local db you made above: +Note, you can override values found in the `.env` file by including a `env` key after the `envFile` key in the launch configuration--an example of this is shown (commented out) in the first launch configuration in the `launch-example.json` file. -- "DB_USER": "refresh", -- "DB_PASS": "", -- "DB_HOST": "localhost", -- "DB_NAME": "refresher", -- "DB_PORT": "5432", -- "DB_SSL_MODE": "disable" - leaving blank with default to "require" # Services