Skip to content

Commit

Permalink
Allow artifact path to be located outside the sync root (#2128)
Browse files Browse the repository at this point in the history
## Changes

We perform a check during path translation that the path being
referenced is contained in the bundle's sync root. If it isn't, it's not
a valid remote reference. However, this doesn't apply to paths that are
_always_ local, such as the artifact path. An artifact's build command
is executed in its path. Files created by the artifact build (e.g.
wheels or JARs) don't need to be in the sync root because they have a
dedicated and different upload path into `${workspace.artifact_path}`.

Therefore, this check that a path is contained in the bundle's sync root
doesn't apply to artifact paths. This change modifies the structure of
path translation to allow opting out of this check.

Fixes #1927.

## Tests

* Existing and new tests pass.
* Manually confirmed that building and using a wheel built outside the
sync root path works as expected.
* No acceptance tests because we don't run build as part of validate.
  • Loading branch information
pietern authored Jan 14, 2025
1 parent e682eeb commit 5d9bc3b
Show file tree
Hide file tree
Showing 10 changed files with 298 additions and 93 deletions.
203 changes: 135 additions & 68 deletions bundle/config/mutator/translate_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io/fs"
"net/url"
"os"
"path"
"path/filepath"
"strings"
Expand All @@ -17,6 +18,47 @@ import (
"github.com/databricks/cli/libs/notebook"
)

// TranslateMode specifies how a path should be translated.
type TranslateMode int

const (
// TranslateModeNotebook translates a path to a remote notebook.
TranslateModeNotebook TranslateMode = iota

// TranslateModeFile translates a path to a remote regular file.
TranslateModeFile

// TranslateModeDirectory translates a path to a remote directory.
TranslateModeDirectory

// TranslateModeLocalAbsoluteFile translates a path to the local absolute file path.
// It returns an error if the path does not exist or is a directory.
TranslateModeLocalAbsoluteFile

// TranslateModeLocalAbsoluteDirectory translates a path to the local absolute directory path.
// It returns an error if the path does not exist or is not a directory.
TranslateModeLocalAbsoluteDirectory

// TranslateModeLocalRelative translates a path to be relative to the bundle sync root path.
// It does not check if the path exists, nor care if it is a file or directory.
TranslateModeLocalRelative

// TranslateModeLocalRelativeWithPrefix translates a path to be relative to the bundle sync root path.
// It a "./" prefix to the path if it does not already have one.
// This allows for disambiguating between paths and PyPI package names.
TranslateModeLocalRelativeWithPrefix
)

// translateOptions control path translation behavior.
type translateOptions struct {
// Mode specifies how the path should be translated.
Mode TranslateMode

// AllowPathOutsideSyncRoot can be set for paths that are not tied to the sync root path.
// This is the case for artifact paths, for example.
AllowPathOutsideSyncRoot bool
}

type ErrIsNotebook struct {
path string
}
Expand Down Expand Up @@ -44,8 +86,6 @@ func (m *translatePaths) Name() string {
return "TranslatePaths"
}

type rewriteFunc func(literal, localFullPath, localRelPath, remotePath string) (string, error)

// translateContext is a context for rewriting paths in a config.
// It is freshly instantiated on every mutator apply call.
// It provides access to the underlying bundle object such that
Expand All @@ -56,74 +96,90 @@ type translateContext struct {
// seen is a map of local paths to their corresponding remote paths.
// If a local path has already been successfully resolved, we do not need to resolve it again.
seen map[string]string

// remoteRoot is the root path of the remote workspace.
// It is equal to ${workspace.file_path} for regular deployments.
// It points to the source root path for source-linked deployments.
remoteRoot string
}

// rewritePath converts a given relative path from the loaded config to a new path based on the passed rewriting function
//
// It takes these arguments:
// - The argument `dir` is the directory relative to which the given relative path is.
// - The given relative path is both passed and written back through `*p`.
// - The argument `fn` is a function that performs the actual rewriting logic.
// This logic is different between regular files or notebooks.
// - The context in which the function is called.
// - The argument `dir` is the directory relative to which the relative path should be interpreted.
// - The argument `input` is the relative path to rewrite.
// - The argument `opts` is a struct that specifies how the path should be rewritten.
// It contains a `Mode` field that specifies how the path should be rewritten.
//
// The function returns an error if it is impossible to rewrite the given relative path.
// The function returns the rewritten path if successful, or an error if the path could not be rewritten.
// The returned path is an empty string if the path was not rewritten.
func (t *translateContext) rewritePath(
ctx context.Context,
dir string,
p *string,
fn rewriteFunc,
) error {
input string,
opts translateOptions,
) (string, error) {
// We assume absolute paths point to a location in the workspace
if path.IsAbs(*p) {
return nil
if path.IsAbs(input) {
return "", nil
}

url, err := url.Parse(*p)
url, err := url.Parse(input)
if err != nil {
return err
return "", err
}

// If the file path has scheme, it's a full path and we don't need to transform it
if url.Scheme != "" {
return nil
return "", nil
}

// Local path is relative to the directory the resource was defined in.
localPath := filepath.Join(dir, filepath.FromSlash(*p))
localPath := filepath.Join(dir, filepath.FromSlash(input))
if interp, ok := t.seen[localPath]; ok {
*p = interp
return nil
return interp, nil
}

// Local path must be contained in the sync root.
// If it isn't, it won't be synchronized into the workspace.
localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath)
if err != nil {
return err
}
if strings.HasPrefix(localRelPath, "..") {
return fmt.Errorf("path %s is not contained in sync root path", localPath)
return "", err
}

var workspacePath string
if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) {
workspacePath = t.b.SyncRootPath
} else {
workspacePath = t.b.Config.Workspace.FilePath
if !opts.AllowPathOutsideSyncRoot && !filepath.IsLocal(localRelPath) {
return "", fmt.Errorf("path %s is not contained in sync root path", localPath)
}
remotePath := path.Join(workspacePath, filepath.ToSlash(localRelPath))

// Convert local path into workspace path via specified function.
interp, err := fn(*p, localPath, localRelPath, remotePath)
var interp string
switch opts.Mode {
case TranslateModeNotebook:
interp, err = t.translateNotebookPath(ctx, input, localPath, localRelPath)
case TranslateModeFile:
interp, err = t.translateFilePath(ctx, input, localPath, localRelPath)
case TranslateModeDirectory:
interp, err = t.translateDirectoryPath(ctx, input, localPath, localRelPath)
case TranslateModeLocalAbsoluteFile:
interp, err = t.translateLocalAbsoluteFilePath(ctx, input, localPath, localRelPath)
case TranslateModeLocalAbsoluteDirectory:
interp, err = t.translateLocalAbsoluteDirectoryPath(ctx, input, localPath, localRelPath)
case TranslateModeLocalRelative:
interp, err = t.translateLocalRelativePath(ctx, input, localPath, localRelPath)
case TranslateModeLocalRelativeWithPrefix:
interp, err = t.translateLocalRelativeWithPrefixPath(ctx, input, localPath, localRelPath)
default:
return "", fmt.Errorf("unsupported translate mode: %d", opts.Mode)
}
if err != nil {
return err
return "", err
}

*p = interp
t.seen[localPath] = interp
return nil
return interp, nil
}

func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) {
func (t *translateContext) translateNotebookPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) {
nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath))
if errors.Is(err, fs.ErrNotExist) {
if filepath.Ext(localFullPath) != notebook.ExtensionNone {
Expand Down Expand Up @@ -162,10 +218,11 @@ to contain one of the following file extensions: [%s]`, literal, strings.Join(ex
}

// Upon import, notebooks are stripped of their extension.
return strings.TrimSuffix(remotePath, filepath.Ext(localFullPath)), nil
localRelPathNoExt := strings.TrimSuffix(localRelPath, filepath.Ext(localRelPath))
return path.Join(t.remoteRoot, filepath.ToSlash(localRelPathNoExt)), nil
}

func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) {
func (t *translateContext) translateFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) {
nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath))
if errors.Is(err, fs.ErrNotExist) {
return "", fmt.Errorf("file %s not found", literal)
Expand All @@ -176,25 +233,21 @@ func (t *translateContext) translateFilePath(literal, localFullPath, localRelPat
if nb {
return "", ErrIsNotebook{localFullPath}
}
return remotePath, nil
return path.Join(t.remoteRoot, filepath.ToSlash(localRelPath)), nil
}

func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) {
func (t *translateContext) translateDirectoryPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) {
info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath))
if err != nil {
return "", err
}
if !info.IsDir() {
return "", fmt.Errorf("%s is not a directory", localFullPath)
}
return remotePath, nil
}

func (t *translateContext) translateNoOp(literal, localFullPath, localRelPath, remotePath string) (string, error) {
return localRelPath, nil
return path.Join(t.remoteRoot, filepath.ToSlash(localRelPath)), nil
}

func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) {
func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) {
info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath))
if errors.Is(err, fs.ErrNotExist) {
return "", fmt.Errorf("file %s not found", literal)
Expand All @@ -208,16 +261,33 @@ func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, l
return localFullPath, nil
}

func (t *translateContext) translateNoOpWithPrefix(literal, localFullPath, localRelPath, remotePath string) (string, error) {
func (t *translateContext) translateLocalAbsoluteDirectoryPath(ctx context.Context, literal, localFullPath, _ string) (string, error) {
info, err := os.Stat(localFullPath)
if errors.Is(err, fs.ErrNotExist) {
return "", fmt.Errorf("directory %s not found", literal)
}
if err != nil {
return "", fmt.Errorf("unable to determine if %s is a directory: %w", localFullPath, err)
}
if !info.IsDir() {
return "", fmt.Errorf("expected %s to be a directory but found a file", literal)
}
return localFullPath, nil
}

func (t *translateContext) translateLocalRelativePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) {
return localRelPath, nil
}

func (t *translateContext) translateLocalRelativeWithPrefixPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) {
if !strings.HasPrefix(localRelPath, ".") {
localRelPath = "." + string(filepath.Separator) + localRelPath
}
return localRelPath, nil
}

func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) {
out := v.MustString()
err := t.rewritePath(dir, &out, fn)
func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.Value, dir string, opts translateOptions) (dyn.Value, error) {
out, err := t.rewritePath(ctx, dir, v.MustString(), opts)
if err != nil {
if target := (&ErrIsNotebook{}); errors.As(err, target) {
return dyn.InvalidValue, fmt.Errorf(`expected a file for "%s" but got a notebook: %w`, p, target)
Expand All @@ -228,43 +298,38 @@ func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc,
return dyn.InvalidValue, err
}

return dyn.NewValue(out, v.Locations()), nil
}

func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) {
nv, err := t.rewriteValue(p, v, fn, dir)
if err == nil {
return nv, nil
}

// If we failed to rewrite the path, try to rewrite it relative to the fallback directory.
if fallback != "" {
nv, nerr := t.rewriteValue(p, v, fn, fallback)
if nerr == nil {
// TODO: Emit a warning that this path should be rewritten.
return nv, nil
}
// If the path was not rewritten, return the original value.
if out == "" {
return v, nil
}

return dyn.InvalidValue, err
return dyn.NewValue(out, v.Locations()), nil
}

func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
func (m *translatePaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
t := &translateContext{
b: b,
seen: make(map[string]string),
}

// Set the remote root to the sync root if source-linked deployment is enabled.
// Otherwise, set it to the workspace file path.
if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) {
t.remoteRoot = t.b.SyncRootPath
} else {
t.remoteRoot = t.b.Config.Workspace.FilePath
}

err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
var err error
for _, fn := range []func(dyn.Value) (dyn.Value, error){
for _, fn := range []func(context.Context, dyn.Value) (dyn.Value, error){
t.applyJobTranslations,
t.applyPipelineTranslations,
t.applyArtifactTranslations,
t.applyDashboardTranslations,
t.applyAppsTranslations,
} {
v, err = fn(v)
v, err = fn(ctx, v)
if err != nil {
return dyn.InvalidValue, err
}
Expand All @@ -275,6 +340,8 @@ func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos
return diag.FromErr(err)
}

// gatherFallbackPaths collects the fallback paths for relative paths in the configuration.
// Read more about the motivation for this functionality in the "fallback" path translation tests.
func gatherFallbackPaths(v dyn.Value, typ string) (map[string]string, error) {
fallback := make(map[string]string)
pattern := dyn.NewPattern(dyn.Key("resources"), dyn.Key(typ), dyn.AnyKey())
Expand Down
9 changes: 7 additions & 2 deletions bundle/config/mutator/translate_paths_apps.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
package mutator

import (
"context"
"fmt"

"github.com/databricks/cli/libs/dyn"
)

func (t *translateContext) applyAppsTranslations(v dyn.Value) (dyn.Value, error) {
func (t *translateContext) applyAppsTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) {
// Convert the `source_code_path` field to a remote absolute path.
// We use this path for app deployment to point to the source code.
pattern := dyn.NewPattern(
Expand All @@ -16,13 +17,17 @@ func (t *translateContext) applyAppsTranslations(v dyn.Value) (dyn.Value, error)
dyn.Key("source_code_path"),
)

opts := translateOptions{
Mode: TranslateModeDirectory,
}

return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
key := p[2].Key()
dir, err := v.Location().Directory()
if err != nil {
return dyn.InvalidValue, fmt.Errorf("unable to determine directory for app %s: %w", key, err)
}

return t.rewriteRelativeTo(p, v, t.translateDirectoryPath, dir, "")
return t.rewriteValue(ctx, p, v, dir, opts)
})
}
Loading

0 comments on commit 5d9bc3b

Please sign in to comment.