Skip to content

Commit

Permalink
Updated docs and dev setup files
Browse files Browse the repository at this point in the history
  • Loading branch information
simon-20 committed May 17, 2024
1 parent afcf402 commit b952952
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 46 deletions.
5 changes: 0 additions & 5 deletions .env

This file was deleted.

29 changes: 29 additions & 0 deletions .env-example
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Used for local development, including when doing VS Code Debugging (referenced by .vscode/launch.json)

DB_USER=refresh
DB_PASS=
DB_HOST=localhost
DB_NAME=refresher
DB_PORT=5432
DB_SSL_MODE=disable

AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;QueueEndpoint=http://localhost:10001/devstoreaccount1;TableEndpoint=http://localhost:10002/devstoreaccount1;"
AZURE_STORAGE_CONTAINER_SOURCE=source
AZURE_STORAGE_CONTAINER_CLEAN=clean
ACTIVITIES_LAKE_CONTAINER_NAME=lake

SCHEMA_VALIDATION_API_URL=VALIDATOR_HOSTNAME_HERE/api/pvt/schema-validate-file
SCHEMA_VALIDATION_KEY_NAME=x-functions-key
SCHEMA_VALIDATION_KEY_VALUE=

VALIDATOR_API_URL=VALIDATOR_HOSTNAME_HERE/api/pub/validate
VALIDATOR_API_KEY_NAME=x-functions-key
VALIDATOR_API_KEY_VALUE=

SOLR_API_URL=http://localhost:8983/solr/
SOLR_USER=x
SOLR_PASSWORD=y
SOLR_PARALLEL_PROCESSES=5
SOLR_500_SLEEP=60

LOG_LEVEL=debug
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ cython_debug/

.vscode/*
!.vscode/extensions.json
!.vscode/launch-example.json
pyenv

scratch
137 changes: 137 additions & 0 deletions .vscode/launch-example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
{


// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [

{ // Pipeline - 01 - Refresh
"name": "Pipeline - 01 - Refresh",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "refresh"],
"envFile": "${workspaceFolder}/.env"

// "env": {
// "DB_USER": "",
// "DB_PASS": "",
// "DB_HOST": "",
// "DB_NAME": "",
// "DB_PORT": "",
// "DB_SSL_MODE": "disable",

// "AZURE_STORAGE_CONNECTION_STRING": "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;QueueEndpoint=http://localhost:10001/devstoreaccount1;TableEndpoint=http://localhost:10002/devstoreaccount1;",
// "AZURE_STORAGE_CONTAINER_SOURCE": "source",
// "AZURE_STORAGE_CONTAINER_CLEAN": "clean",
// "ACTIVITIES_LAKE_CONTAINER_NAME": "lake",

// "SCHEMA_VALIDATION_API_URL": "VALIDATOR_HOSTNAME_HERE/api/pvt/schema-validate-file",
// "SCHEMA_VALIDATION_KEY_NAME": "x-functions-key",
// "SCHEMA_VALIDATION_KEY_VALUE": "",

// "VALIDATOR_API_URL": "VALIDATOR_HOSTNAME_HERE/api/pub/validate",
// "VALIDATOR_API_KEY_NAME": "x-functions-key",
// "VALIDATOR_API_KEY_VALUE": "",

// "SOLR_API_URL": "http://localhost:8983/solr/",
// "SOLR_USER": "x",
// "SOLR_PASSWORD": "y",
// "LOG_LEVEL": "debug",
// "SOLR_PARALLEL_PROCESSES": "5",
// "SOLR_500_SLEEP": "60"
// }
},

{ // Pipeline - 02 - Reload
"name": "Pipeline - 02 - Reload",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "reload"],
"envFile": "${workspaceFolder}/.env"
},

{ // Pipeline - 04 - Validate
"name": "Pipeline - 04 - Validate (against local)",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "validate"],
"envFile": "${workspaceFolder}/.env"
},

{ // Pipeline - 04 - Validate (using Azure dev)
"name": "Pipeline - 04 - Validate (against dev)",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "validate"],
"envFile": "${workspaceFolder}/.env-dev-validator"
},


{ // Pipeline - 05 - Copy valid
"name": "Pipeline - 05 - Copy valid",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "copy_valid"],
"envFile": "${workspaceFolder}/.env"
},

{ // Pipeline - 06 - Clean invalid
"name": "Pipeline - 06 - Clean invalid",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "clean_invalid"],
"envFile": "${workspaceFolder}/.env"
},

{ // Pipeline - 07 - Flatten
"name": "Pipeline - 07 - Flatten",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "flatten"],
"envFile": "${workspaceFolder}/.env"
},

{ // Pipeline - 08 - Lakify
"name": "Pipeline - 08 - Lakify",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "lakify"],
"envFile": "${workspaceFolder}/.env"
},

{ // Pipeline - 09 - Solrize
"name": "Pipeline - 09 - Solrize",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"justMyCode": false,
"args": ["-t", "solrize"],
"envFile": "${workspaceFolder}/.env"
},
],
"compounds": [
{
"name": "Compound",
"configurations": []
}
]
}
53 changes: 12 additions & 41 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,56 +45,27 @@ Before running any task, the Refresher checks the version of the database agains

## Create Local Database

- Creates a database called `refresher` owned by `refresh`
 `createdb refresher -O refresh`
- Create a database called `refresher` owned by `refresh`:

## launch.json

Setup a `.vscode/launch.json` to run locally with attached debugging like so:

```json
{
"configurations": [
{
"name": "Refresh - Local",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/src/handler.py",
"console": "integratedTerminal",
"args": ["-t", "refresh"],
"env": {
"AZURE_STORAGE_CONNECTION_STRING": "",
"AZURE_STORAGE_CONTAINER_SOURCE": "",
"SOLR_API_URL": "http://localhost:8983/solr/",
"DB_USER": "refresh",
"DB_PASS": "",
"DB_HOST": "localhost",
"DB_NAME": "refresher",
"DB_PORT": "5432",
"DB_SSL_MODE": "disable",
"PARALLEL_PROCESSES": "10"
}
}
```
 `createdb refresher -O refresh`

## Environment Variables

See `src/constants/config.py` for all Environment Variables and Constants with descriptions. Additional information can be found below as well.
The canonical source for environment variables and constants is `src/constants/config.py`.

To get started, copy `.env-example` to `.env` and fill in as needed. As a minimum, you'll need to set up the database password (the other database values are predone for the unified pipeline docker setup).

If wanted to run the validate stage, you must replace `VALIDATOR_HOSTNAME_HERE` with the validator you want to use (e.g., a local copy, or the dev instance).

The `.env` file is referenced by VS Code's `launch.json`, and so is also used to setup the environment for interactive debugging sessions with VS Code.

### AZURE_STORAGE_CONNECTION_STRING

- This can be found in the Azure Portal > Storage Account > Access Keys or by running `az storage account show-connection-string -g MyResourceGroup -n MyStorageAccount`
## VS Code `launch.json`

### DB\_\*
If using VS Code, copy `launch-example.json` to `launch.json`. This will allow you to run and debug each of the different pipeline stages from within VS Code. This file references the `.env` file, and you can have different launch tasks work with different environment files, as shown in the example file with two different validate stages.

Example for connecting to local db you made above:
Note, you can override values found in the `.env` file by including a `env` key after the `envFile` key in the launch configuration--an example of this is shown (commented out) in the first launch configuration in the `launch-example.json` file.

- "DB_USER": "refresh",
- "DB_PASS": "",
- "DB_HOST": "localhost",
- "DB_NAME": "refresher",
- "DB_PORT": "5432",
- "DB_SSL_MODE": "disable" - leaving blank with default to "require"

# Services

Expand Down

0 comments on commit b952952

Please sign in to comment.