Skip to content

Commit

Permalink
Support reindexing of files that are directly written to the registry. (
Browse files Browse the repository at this point in the history
#15)

This gives administrators an option to directly write to the registry and
bypass the staging directory for bulk updates. It also allows us to easily fix
the internal files after manual updates to the user-supplied files.
  • Loading branch information
LTLA authored Jan 26, 2025
1 parent 2427ae9 commit 334b920
Show file tree
Hide file tree
Showing 7 changed files with 1,209 additions and 182 deletions.
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,29 @@ This file should be JSON-formatted with the following properties:

On success, the latest version is updated and a JSON formatted file will be created in `responses` with the `status` property set to `SUCCESS`.

### Reindexing a version (admin)

Administrators of a Gobbler instance can directly reindex the contents of a version directory, regenerating the various `..manifest` and `..links` files.
This is useful for correcting the Gobbler internal files after manual changes to the user-supplied files.
It also allows for more efficient bulk uploads where administrators can write directly to the Gobbler registry and then generate the internal files afterwards,
thus avoiding an unnecessary copy from the staging directory.

To trigger a reindexing job, create a file with the `request-reindex_version-` prefix.
This file should be JSON-formatted with the following properties:

- `project`: string containing the name of the project.
- `asset`: string containing the name of the asset.
- `version`: string containing the name of the version.

On success, the internal files will be created.

Note that the reindexing process assumes that the `..summary` file is already present for this version directory.
It will not modify this file so as to respect the original uploader's identity, time of upload, probational status, etc.

The reindexing process will not update the project usage as the current usage will likely be incorrect after manual changes to the version directory's contents.
Similarly, reindexing will not update the latest version for the asset, as the `..summary` files have not changed.
Administrators should refresh these statistics manually as described above after all modifications to the registry are complete.

### Deleting content (admin)

Administrators have the ability to delete files from the registry.
Expand Down Expand Up @@ -322,6 +345,8 @@ The file contains a JSON object that details the type of action in the `type` pr
This has the `project` and `asset` string property.
- `delete-project` indicates that a project was deleted.
This has the `project` string property.
- `reindex-version` indicates that a non-probational version was reindexed.
This has the `project`, `asset`, `version` string properties to describe the version.

Downstream systems can inspect these files to determine what changes have occurred in the registry.
This is intended for systems that need to maintain a database index on top of the bucket's contents.
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ func main() {
reportable_err = deleteAssetHandler(reqpath, &globals)
} else if strings.HasPrefix(reqtype, "delete_version-") {
reportable_err = deleteVersionHandler(reqpath, &globals)
} else if strings.HasPrefix(reqtype, "reindex_version-") {
reportable_err = reindexHandler(reqpath, &globals)
} else if strings.HasPrefix(reqtype, "health_check-") { // TO-BE-DEPRECATED, see /check below.
reportable_err = nil
} else {
Expand Down
142 changes: 142 additions & 0 deletions reindex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package main

import (
"fmt"
"time"
"path/filepath"
"os"
"encoding/json"
"net/http"
)

type reindexRequest struct {
Project *string `json:"project"`
Asset *string `json:"asset"`
Version *string `json:"version"`
User string `json:"-"`
}

func reindexPreflight(reqpath string) (*reindexRequest, error) {
handle, err := os.ReadFile(reqpath)
if err != nil {
return nil, fmt.Errorf("failed to read %q; %w", reqpath, err)
}

req_user, err := identifyUser(reqpath)
if err != nil {
return nil, fmt.Errorf("failed to find owner of %q; %w", reqpath, err)
}

request := reindexRequest{}
err = json.Unmarshal(handle, &request)
if err != nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("failed to parse JSON from %q; %w", reqpath, err))
}

if request.Project == nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("expected a 'project' property in %q", reqpath))
}
project := *(request.Project)
err = isBadName(project)
if err != nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("invalid project name %q; %w", project, err))
}

if request.Asset == nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("expected an 'asset' property in %q", reqpath))
}
asset := *(request.Asset)
err = isBadName(asset)
if err != nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("invalid asset name %q; %w", asset, err))
}

if request.Version == nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("expected a 'version' property in %q", reqpath))
}
version := *(request.Version)
err = isBadName(version)
if err != nil {
return nil, newHttpError(http.StatusBadRequest, fmt.Errorf("invalid version name %q; %w", version, err))
}

request.User = req_user
return &request, nil
}

func reindexHandler(reqpath string, globals *globalConfiguration) error {
request, err := reindexPreflight(reqpath)
if err != nil {
return err
}

// Configuring the project; we apply a lock to the project to avoid concurrent changes.
project := *(request.Project)
project_dir := filepath.Join(globals.Registry, project)
err = globals.Locks.LockDirectory(project_dir, 10 * time.Second)
if err != nil {
return fmt.Errorf("failed to acquire the lock on %q; %w", project_dir, err)
}
defer globals.Locks.Unlock(project_dir)

// Check if this reindexing request is properly authorized.
perms, err := readPermissions(project_dir)
if err != nil {
return fmt.Errorf("failed to read permissions for %q; %w", project, err)
}

ok := isAuthorizedToMaintain(request.User, globals.Administrators, perms.Owners)
if !ok {
return newHttpError(http.StatusForbidden, fmt.Errorf("user '" + request.User + "' is not authorized to reindex '" + project + "'"))
}

// Configuring the asset and version.
asset := *(request.Asset)
asset_dir := filepath.Join(project_dir, asset)
if _, err := os.Stat(asset_dir); err != nil {
return newHttpError(http.StatusNotFound, fmt.Errorf("cannot access asset directory at %q; %w", asset_dir, err))
}

version := *(request.Version)
version_dir := filepath.Join(asset_dir, version)
if _, err := os.Stat(version_dir); err != nil {
return newHttpError(http.StatusNotFound, fmt.Errorf("cannot access version directory at %q; %w", asset_dir, err))
}

err = reindexDirectory(globals.Registry, project, asset, version)
if err != nil {
return fmt.Errorf("failed to reindex project; %w", err)
}

summ, err := readSummary(version_dir)
if err != nil {
return fmt.Errorf("failed to read the summary file at %q; %w", version_dir, err)
}

if summ.OnProbation == nil || !*(summ.OnProbation) {
// Doing this as late as possible to reduce the chances of an error
// triggering an abort _after_ the latest version has been updated.
// I suppose we could try to reset to the previous value; but if the
// writes failed there's no guarantee that a reset would work either.
latest := latestMetadata { Version: version }
latest_path := filepath.Join(asset_dir, latestFileName)
err := dumpJson(latest_path, &latest)
if err != nil {
return fmt.Errorf("failed to save latest version for %q; %w", asset_dir, err)
}

// Adding a log.
log_info := map[string]interface{} {
"type": "reindex-version",
"project": project,
"asset": asset,
"version": version,
}
err = dumpLog(globals.Registry, log_info)
if err != nil {
return fmt.Errorf("failed to save log file; %w", err)
}
}

return nil
}
Loading

0 comments on commit 334b920

Please sign in to comment.