changes in build

podaac · Sep 29, 2023 · cd05fdb · cd05fdb
1 parent 2ed36be
commit cd05fdb
Show file tree

Hide file tree

Showing 2 changed files with 257 additions and 0 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -160,3 +160,96 @@ jobs:
         run: |
           poetry run pytest tests/test_api.py -k 'test_gettimeseries_get'
           poetry run pytest tests/test_api.py -k 'test_getsubset_get'
+
+
+
+      ## Set environment variables
+      - name: Configure Initial YAML file and environment variables
+        run: |
+          echo "THE_VERSION=${{ env.software_version }}" >> $GITHUB_ENV;
+          echo "GIT_BRANCH=${GITHUB_REF#refs/heads/}" >> $GITHUB_ENV;
+          GITHUB_REF_READABLE="${GITHUB_REF//\//-}"
+          echo "GITHUB_REF_READABLE=${GITHUB_REF_READABLE}" >> $GITHUB_ENV
+          echo "THE_ENV=sit" >> $GITHUB_ENV
+          echo "TARGET_ENV_UPPERCASE=SIT" >> $GITHUB_ENV
+
+
+      # Setup docker to build and push images
+      - name: Log in to the Container registry
+        if: ${{ startsWith(github.ref, 'test') }}
+        uses: docker/login-action@v1
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        if: ${{ startsWith(github.ref, 'test') }}
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=semver,pattern={{version}},value=${{ env.THE_VERSION }}
+            type=raw,value=${{ env.THE_ENV }}
+
+      - name: Build and push Docker image
+        if: ${{ startsWith(github.ref, 'test') }}
+        #if: |
+        #  github.ref == 'refs/heads/develop' ||
+        #  github.ref == 'refs/heads/main'    ||
+        #  startsWith(github.ref, 'refs/heads/release') ||
+        #  github.event.head_commit.message == '/deploy sit' ||
+        #  github.event.head_commit.message == '/deploy uat'
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          file: docker/Dockerfile
+          push: true
+          pull: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+
+      # Setup Terraform to Deploy
+
+      - name: Configure AWS Credentials as Environment Variables
+        run: echo "AWS_ACCESS_KEY_ID=${{ secrets[format('AWS_ACCESS_KEY_ID_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }}" >> $GITHUB_ENV |
+          echo "AWS_SECRET_ACCESS_KEY=${{ secrets[format('AWS_SECRET_ACCESS_KEY_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }}" >> $GITHUB_ENV
+
+      - name: Validate AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1
+        with:
+          aws-region: us-west-2
+          role-session-name: GitHubActions
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets[format('AWS_ACCESS_KEY_ID_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets[format('AWS_SECRET_ACCESS_KEY_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }}
+      - run: aws sts get-caller-identity
+
+      - uses: hashicorp/[email protected]
+        with:
+          terraform_version: 1.0.3
+
+      - name: Deploy Terraform
+        #if: |
+        #  github.ref == 'refs/heads/develop' ||
+        #  github.ref == 'refs/heads/main'    ||
+        #  startsWith(github.ref, 'refs/heads/release') ||
+        #  github.event.head_commit.message == '/deploy sit' ||
+        #  github.event.head_commit.message == '/deploy uat'
+        working-directory: terraform/
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets[format('AWS_ACCESS_KEY_ID_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets[format('AWS_SECRET_ACCESS_KEY_SERVICES_{0}', env.TARGET_ENV_UPPERCASE)] }}
+          AWS_DEFAULT_REGION: us-west-2
+
+          TF_VAR_hydrocronapi_api_docker_image: "ghcr.io/podaac/hydrocron:${{ env.THE_VERSION }}"
+
+        run: |
+          echo "-------"
+          echo ${{ env.THE_ENV }}
+          echo ${{ env.THE_VERSION }}
+          terraform init -reconfigure -backend-config="bucket=podaac-services-sit-terraform" -backend-config="region=us-west-2"
+          terraform plan -var-file=tfvars/${{ env.THE_ENV }}.tfvars -var="app_version=${{ env.THE_VERSION }}" -out="tfplan"
+          terraform apply -auto-approve tfplan
diff --git a/tests/example_load_data.py b/tests/example_load_data.py
@@ -0,0 +1,164 @@
+"""
+This module searches for new granules and loads data into
+the appropriate DynamoDB table
+"""
+import logging
+import argparse
+
+import boto3
+import earthaccess
+from hydrocron_db.hydrocron_database import HydrocronDB
+from hydrocron_db.hydrocron_database import DynamoKeys
+from hydrocron_db.io import swot_reach_node_shp
+
+
+def parse_args():
+    """
+    Argument parser
+    """
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-t", "--table-name", dest='table_name', required=True,
+                        help="The name of the database table to add data")
+    parser.add_argument("-sd", "--start_date", dest="start", required=False,
+                        help="The ISO date time after which data should be retrieved. For Example, --start-date 2023-01-01T00:00:00Z")  # noqa E501
+    parser.add_argument("-ed", "--end-date", required=False, dest="end",
+                        help="The ISO date time before which data should be retrieved. For Example, --end-date 2023-02-14T00:00:00Z")  # noqa E501
+
+    return parser.parse_args()
+
+
+def setup_connection():
+    """
+    Set up DynamoDB connection
+
+    Returns
+    -------
+    dynamo_instance : HydrocronDB
+    """
+    session = boto3.session.Session()
+    dyndb_resource = session.resource('dynamodb')
+
+    dynamo_instance = HydrocronDB(dyn_resource=dyndb_resource)
+
+    return dynamo_instance
+
+
+def find_new_granules(collection_shortname, start_date, end_date):
+    """
+    Find granules to ingest
+
+    Parameters
+    ----------
+    collection_shortname : string
+        The shortname of the collection to search
+
+    Returns
+    -------
+    granule_paths : list of strings
+        List of S3 paths to the granules that have not yet been ingested
+    """
+    auth = earthaccess.login()
+
+    cmr_search = earthaccess.DataGranules(auth). \
+        short_name(collection_shortname).temporal(start_date, end_date)
+
+    results = cmr_search.get()
+
+    granule_paths = [g.data_links(access='direct') for g in results]
+    return granule_paths
+
+
+def load_data(hydrocron_table, granule_path):
+    """
+    Create table and load data
+
+    hydrocron_table : HydrocronTable
+        The table to load data into
+    granules : list of strings
+        The list of S3 paths of granules to load data from
+    """
+    print(granule_path)
+    if hydrocron_table.table_name == "hydrocron-swot-reach-table":
+        if 'Reach' in granule_path:
+            items = swot_reach_node_shp.read_shapefile(granule_path)
+
+            for item_attrs in items:
+                # write to the table
+                hydrocron_table.add_data(**item_attrs)
+
+    elif hydrocron_table.table_name == "hydrocron-swot-node-table":
+        if 'Node' in granule_path:
+            items = swot_reach_node_shp.read_shapefile(granule_path)
+
+            for item_attrs in items:
+                # write to the table
+                hydrocron_table.add_data(**item_attrs)
+
+    else:
+        print('Items cannot be parsed, file reader not implemented for table '
+              + hydrocron_table.table_name)
+
+
+def run(args=None):
+    """
+    Main function to manage loading data into Hydrocron
+
+    """
+    if args is None:
+        args = parse_args()
+
+    table_name = args.table_name
+    start_date = args.start
+    end_date = args.end
+
+    match table_name:
+        case "hydrocron-swot-reach-table":
+            collection_shortname = "SWOT_L2_HR_RIVERSP_1.0"
+            pkey = 'reach_id'
+            pkey_type = 'S'
+            skey = 'range_start_time'
+            skey_type = 'S'
+        case "hydrocron-swot-node-table":
+            collection_shortname = "SWOT_L2_HR_RIVERSP_1.0"
+            pkey = 'node_id'
+            pkey_type = 'S'
+            skey = 'range_start_time'
+            skey_type = 'S'
+        case _:
+            logging.warning(
+                "Hydrocron table '%s' does not exist.", table_name)
+
+    dynamo_instance = setup_connection()
+
+    if dynamo_instance.table_exists(table_name):
+        hydrocron_table = dynamo_instance.load_table(table_name)
+    else:
+        logging.info("creating new table... ")
+        dynamo_keys = DynamoKeys(
+            partition_key=pkey,
+            partition_key_type=pkey_type,
+            sort_key=skey,
+            sort_key_type=skey_type)
+
+        hydrocron_table = dynamo_instance.create_table(table_name, dynamo_keys)
+
+    new_granules = find_new_granules(
+        collection_shortname,
+        start_date,
+        end_date)
+
+    for granule in new_granules:
+        load_data(hydrocron_table, granule[0])
+
+
+def main():
+    try:
+        run()
+    except Exception as e:  # pylint: disable=broad-except
+        logging.exception("Uncaught exception occurred during execution.")
+        exit(hash(e))
+
+
+if __name__ == "__main__":
+    main()