Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DeleteVolumeGroupSnapshot API - part 2 #952

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion client/hack/update-crd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ then
TMP_DIR=$(mktemp -d);
cd $TMP_DIR;
go mod init tmp;
go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.11.3;
go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.12.0;
rm -rf $TMP_DIR;
CONTROLLER_GEN=$(which controller-gen)
fi
Expand Down
18 changes: 7 additions & 11 deletions cmd/csi-snapshotter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,13 @@ var (
kubeAPIQPS = flag.Float64("kube-api-qps", 5, "QPS to use while communicating with the kubernetes apiserver. Defaults to 5.0.")
kubeAPIBurst = flag.Int("kube-api-burst", 10, "Burst to use while communicating with the kubernetes apiserver. Defaults to 10.")

metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
enableNodeDeployment = flag.Bool("node-deployment", false, "Enables deploying the sidecar controller together with a CSI driver on nodes to manage snapshots for node-local volumes.")
// TODO(xing-yang): Enable enableVolumeGroupSnapshots when the feature is fully implemented
// enableVolumeGroupSnapshots = flag.Bool("enable-volume-group-snapshots", false, "Enables the volume group snapshot feature, allowing the user to create snapshots of groups of volumes.")
metricsAddress = flag.String("metrics-address", "", "(deprecated) The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string, which means the server is disabled. Only one of `--metrics-address` and `--http-endpoint` can be set.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
retryIntervalStart = flag.Duration("retry-interval-start", time.Second, "Initial retry interval of failed volume snapshot creation or deletion. It doubles with each failure, up to retry-interval-max. Default is 1 second.")
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
enableNodeDeployment = flag.Bool("node-deployment", false, "Enables deploying the sidecar controller together with a CSI driver on nodes to manage snapshots for node-local volumes.")
enableVolumeGroupSnapshots = flag.Bool("enable-volume-group-snapshots", false, "Enables the volume group snapshot feature, allowing the user to create a snapshot of a group of volumes.")

groupSnapshotNamePrefix = flag.String("groupsnapshot-name-prefix", "groupsnapshot", "Prefix to apply to the name of a created group snapshot")
groupSnapshotNameUUIDLength = flag.Int("groupsnapshot-name-uuid-length", -1, "Length in characters for the generated uuid of a created group snapshot. Defaults behavior is to NOT truncate.")
Expand Down Expand Up @@ -228,9 +227,6 @@ func main() {

snapShotter := snapshotter.NewSnapshotter(csiConn)
var groupSnapshotter group_snapshotter.GroupSnapshotter
// TODO(xing-yang): Remove the following line when the enableVolumeGroupSnapshots feature is enabled
bEnable := false
enableVolumeGroupSnapshots := &bEnable
if *enableVolumeGroupSnapshots {
supportsCreateVolumeGroupSnapshot, err := supportsGroupControllerCreateVolumeGroupSnapshot(ctx, csiConn)
if err != nil {
Expand Down
6 changes: 1 addition & 5 deletions cmd/snapshot-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ var (
retryIntervalMax = flag.Duration("retry-interval-max", 5*time.Minute, "Maximum retry interval of failed volume snapshot creation or deletion. Default is 5 minutes.")
enableDistributedSnapshotting = flag.Bool("enable-distributed-snapshotting", false, "Enables each node to handle snapshotting for the local volumes created on that node")
preventVolumeModeConversion = flag.Bool("prevent-volume-mode-conversion", false, "Prevents an unauthorised user from modifying the volume mode when creating a PVC from an existing VolumeSnapshot.")
// TODO(xing-yang): Enable enableVolumeGroupSnapshots when the feature is fully implemented
// enableVolumeGroupSnapshots = flag.Bool("enable-volume-group-snapshots", false, "Enables the volume group snapshot feature, allowing the user to create snapshots of groups of volumes.")
enableVolumeGroupSnapshots = flag.Bool("enable-volume-group-snapshots", false, "Enables the volume group snapshot feature, allowing the user to create a snapshot of a group of volumes.")

retryCRDIntervalMax = flag.Duration("retry-crd-interval-max", 5*time.Second, "Maximum retry interval to wait for CRDs to appear. The default is 5 seconds.")
)
Expand Down Expand Up @@ -208,9 +207,6 @@ func main() {

klog.V(2).Infof("Start NewCSISnapshotController with kubeconfig [%s] resyncPeriod [%+v]", *kubeconfig, *resyncPeriod)

// TODO(xing-yang): Remove the following lines when the enableVolumeGroupSnapshots feature is enabled
bEnable := false
enableVolumeGroupSnapshots := &bEnable
ctrl := controller.NewCSISnapshotCommonController(
snapClient,
kubeClient,
Expand Down
41 changes: 23 additions & 18 deletions pkg/common-controller/groupsnapshot_controller_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -1103,16 +1103,7 @@ func (ctrl *csiSnapshotCommonController) processGroupSnapshotWithDeletionTimesta
groupSnapshotContent = nil
}

klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp[%s]: delete group snapshot content and remove finalizer from group snapshot if needed", utils.GroupSnapshotKey(groupSnapshot))

return ctrl.checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent(groupSnapshot, groupSnapshotContent, deleteGroupSnapshotContent)
}

// checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent deletes
// the group snapshot content and removes group snapshot finalizers if needed
func (ctrl *csiSnapshotCommonController) checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent(groupSnapshot *crdv1alpha1.VolumeGroupSnapshot, groupSnapshotContent *crdv1alpha1.VolumeGroupSnapshotContent, deleteGroupSnapshotContent bool) error {
klog.V(5).Infof("checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent VolumeGroupSnapshot[%s]: %s", utils.GroupSnapshotKey(groupSnapshot), utils.GetGroupSnapshotStatusForLogging(groupSnapshot))

klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp[%s]: check if group snapshot is a candidate for deletion", utils.GroupSnapshotKey(groupSnapshot))
if !utils.IsGroupSnapshotDeletionCandidate(groupSnapshot) {
return nil
}
Expand All @@ -1135,33 +1126,47 @@ func (ctrl *csiSnapshotCommonController) checkandRemoveGroupSnapshotFinalizersAn

}

// regardless of the deletion policy, set the VolumeSnapshotBeingDeleted on
// content object, this is to allow snapshotter sidecar controller to conduct
// a delete operation whenever the content has deletion timestamp set.
// regardless of the deletion policy, set VolumeGroupSnapshotBeingDeleted on
// group snapshot content object, this is to allow snapshotter sidecar controller
// to conduct a delete operation whenever the group snapshot content has deletion
// timestamp set.
if groupSnapshotContent != nil {
klog.V(5).Infof("checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent[%s]: Set VolumeGroupSnapshotBeingDeleted annotation on the group snapshot content [%s]", utils.GroupSnapshotKey(groupSnapshot), groupSnapshotContent.Name)
klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp[%s]: Set VolumeGroupSnapshotBeingDeleted annotation on the group snapshot content [%s]", utils.GroupSnapshotKey(groupSnapshot), groupSnapshotContent.Name)
updatedGroupSnapshotContent, err := ctrl.setAnnVolumeGroupSnapshotBeingDeleted(groupSnapshotContent)
if err != nil {
klog.V(4).Infof("checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent[%s]: failed to set VolumeGroupSnapshotBeingDeleted annotation on the group snapshot content [%s]", utils.GroupSnapshotKey(groupSnapshot), groupSnapshotContent.Name)
klog.V(4).Infof("processGroupSnapshotWithDeletionTimestamp[%s]: failed to set VolumeGroupSnapshotBeingDeleted annotation on the group snapshot content [%s]: %v", utils.GroupSnapshotKey(groupSnapshot), groupSnapshotContent.Name, err)
return err
}
groupSnapshotContent = updatedGroupSnapshotContent
}

// VolumeGroupSnapshot should be deleted. Check and remove finalizers
// If group snapshot content exists and has a deletion policy of Delete, set
// DeletionTimeStamp on the content;
// DeletionTimeStamp on the group snapshot content;
// VolumeGroupSnapshotContent won't be deleted immediately due to the VolumeGroupSnapshotContentFinalizer
if groupSnapshotContent != nil && deleteGroupSnapshotContent {
klog.V(5).Infof("checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent: set DeletionTimeStamp on group snapshot content [%s].", groupSnapshotContent.Name)
klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp[%s]: set DeletionTimeStamp on group snapshot content [%s].", utils.GroupSnapshotKey(groupSnapshot), groupSnapshotContent.Name)
err := ctrl.clientset.GroupsnapshotV1alpha1().VolumeGroupSnapshotContents().Delete(context.TODO(), groupSnapshotContent.Name, metav1.DeleteOptions{})
if err != nil {
ctrl.eventRecorder.Event(groupSnapshot, v1.EventTypeWarning, "GroupSnapshotContentObjectDeleteError", "Failed to delete group snapshot content API object")
return fmt.Errorf("failed to delete VolumeGroupSnapshotContent %s from API server: %q", groupSnapshotContent.Name, err)
}
}

klog.V(5).Infof("checkandRemoveGroupSnapshotFinalizersAndCheckandDeleteGroupSnapshotContent: Remove Finalizer for VolumeGroupSnapshot[%s]", utils.GroupSnapshotKey(groupSnapshot))
klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp[%s]: Delete individual snapshots that are part of the group snapshot", utils.GroupSnapshotKey(groupSnapshot))

// Delete the individual snapshots part of the group snapshot
for _, snapshot := range groupSnapshot.Status.VolumeSnapshotRefList {
err := ctrl.clientset.SnapshotV1().VolumeSnapshots(snapshot.Namespace).Delete(context.TODO(), snapshot.Name, metav1.DeleteOptions{})
if err != nil {
msg := fmt.Sprintf("failed to delete snapshot API object %s/%s part of group snapshot %s: %v", snapshot.Namespace, snapshot.Name, utils.GroupSnapshotKey(groupSnapshot), err)
klog.Error(msg)
ctrl.eventRecorder.Event(groupSnapshot, v1.EventTypeWarning, "SnapshotDeleteError", msg)
return fmt.Errorf(msg)
}
}

klog.V(5).Infof("processGroupSnapshotWithDeletionTimestamp[%s] : Remove Finalizer for VolumeGroupSnapshot", utils.GroupSnapshotKey(groupSnapshot))
// remove VolumeSnapshotBoundFinalizer on the VolumeGroupSnapshot object:
// a. If there is no group snapshot content found, remove the finalizer.
// b. If the group snapshot content is being deleted, i.e., with deleteGroupSnapshotContent == true,
Expand Down
43 changes: 43 additions & 0 deletions pkg/common-controller/snapshot_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,21 @@ func (ctrl *csiSnapshotCommonController) processSnapshotWithDeletionTimestamp(sn
content = nil
}

// Block deletion if this snapshot belongs to a group snapshot.
if snapshot.Status != nil && snapshot.Status.VolumeGroupSnapshotName != nil {
groupSnapshot, err := ctrl.groupSnapshotLister.VolumeGroupSnapshots(snapshot.Namespace).Get(*snapshot.Status.VolumeGroupSnapshotName)
if err == nil {
msg := fmt.Sprintf("deletion of the individual volume snapshot %s is not allowed as it belongs to group snapshot %s. Deleting the group snapshot will trigger the deletion of all the individual volume snapshots that are part of the group.", utils.SnapshotKey(snapshot), utils.GroupSnapshotKey(groupSnapshot))
klog.Error(msg)
ctrl.eventRecorder.Event(snapshot, v1.EventTypeWarning, "SnapshotDeleteError", msg)
return fmt.Errorf(msg)
Copy link
Collaborator

@xing-yang xing-yang Nov 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we either need to add a webhook to prevent the deletion from happening in the first place or add a finalizer. I'm fine to add that in a followup PR. Can you add a note in your PR description?

failed to delete snapshot API %s object as it belongs to group snapshot %s -> deletion of the individual volume snapshot %s is not allowed as it belongs to group snapshot %s. Delete the group snapshot will trigger the deletion of all the individual volume snapshots that are part of the group.

Also add an error msg.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Amongst the other changes I had a tough time testing out the finaliser code, so I removed it. I can address that in a follow up PR. We definitely need some logic in case the web hook is not deployed. I'll change the PR description.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Yes, finalizer is better as the webhook may not be deployed.

}
if !apierrs.IsNotFound(err) {
klog.Errorf("failed to delete snapshot %s: %v", utils.SnapshotKey(snapshot), err)
return err
RaunakShah marked this conversation as resolved.
Show resolved Hide resolved
}
}

klog.V(5).Infof("processSnapshotWithDeletionTimestamp[%s]: delete snapshot content and remove finalizer from snapshot if needed", utils.SnapshotKey(snapshot))

return ctrl.checkandRemoveSnapshotFinalizersAndCheckandDeleteContent(snapshot, content, deleteContent)
Expand Down Expand Up @@ -1139,6 +1154,27 @@ func (ctrl *csiSnapshotCommonController) updateSnapshotStatus(snapshot *crdv1.Vo
volumeSnapshotErr = content.Status.Error.DeepCopy()
}

var groupSnapshotName string
if content.Status != nil && content.Status.VolumeGroupSnapshotHandle != nil {
// If this snapshot belongs to a group snapshot, find the group snapshot
// name from the group snapshot content
groupSnapshotContentList, err := ctrl.groupSnapshotContentLister.List(labels.Everything())
if err != nil {
return nil, err
}
found := false
for _, groupSnapshotContent := range groupSnapshotContentList {
if groupSnapshotContent.Status != nil && groupSnapshotContent.Status.VolumeGroupSnapshotHandle != nil && *groupSnapshotContent.Status.VolumeGroupSnapshotHandle == *content.Status.VolumeGroupSnapshotHandle {
groupSnapshotName = groupSnapshotContent.Spec.VolumeGroupSnapshotRef.Name
found = true
break
}
}
if !found {
return nil, fmt.Errorf("updateSnapshotStatus: cannot find the group snapshot for VolumeSnapshot [%s], will not update snapshot status", utils.SnapshotKey(snapshot))
}
}

klog.V(5).Infof("updateSnapshotStatus: updating VolumeSnapshot [%+v] based on VolumeSnapshotContentStatus [%+v]", snapshot, content.Status)

snapshotObj, err := ctrl.clientset.SnapshotV1().VolumeSnapshots(snapshot.Namespace).Get(context.TODO(), snapshot.Name, metav1.GetOptions{})
Expand All @@ -1162,6 +1198,9 @@ func (ctrl *csiSnapshotCommonController) updateSnapshotStatus(snapshot *crdv1.Vo
if volumeSnapshotErr != nil {
newStatus.Error = volumeSnapshotErr
}
if groupSnapshotName != "" {
newStatus.VolumeGroupSnapshotName = &groupSnapshotName
}
updated = true
} else {
newStatus = snapshotObj.Status.DeepCopy()
Expand All @@ -1188,6 +1227,10 @@ func (ctrl *csiSnapshotCommonController) updateSnapshotStatus(snapshot *crdv1.Vo
newStatus.Error = volumeSnapshotErr
updated = true
}
if newStatus.VolumeGroupSnapshotName == nil && groupSnapshotName != "" {
newStatus.VolumeGroupSnapshotName = &groupSnapshotName
updated = true
}
}

if updated {
Expand Down
6 changes: 3 additions & 3 deletions pkg/sidecar-controller/content_create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TestSyncContent(t *testing.T) {
readyToUse: true,
},
},
expectedListCalls: []listCall{{"sid1-1", map[string]string{}, true, time.Now(), 1, nil}},
expectedListCalls: []listCall{{"sid1-1", map[string]string{}, true, time.Now(), 1, nil, ""}},
expectSuccess: true,
errors: noerrors,
test: testSyncContent,
Expand Down Expand Up @@ -78,7 +78,7 @@ func TestSyncContent(t *testing.T) {
size: defaultSize,
},
},
expectedListCalls: []listCall{{"sid1-2", map[string]string{}, true, time.Now(), 1, nil}},
expectedListCalls: []listCall{{"sid1-2", map[string]string{}, true, time.Now(), 1, nil, ""}},
expectSuccess: true,
errors: noerrors,
test: testSyncContent,
Expand Down Expand Up @@ -194,7 +194,7 @@ func TestSyncContent(t *testing.T) {
readyToUse: true,
},
},
expectedListCalls: []listCall{{"sid1-6", map[string]string{}, true, time.Now(), 1, nil}},
expectedListCalls: []listCall{{"sid1-6", map[string]string{}, true, time.Now(), 1, nil, ""}},
errors: noerrors,
test: testSyncContent,
},
Expand Down
12 changes: 6 additions & 6 deletions pkg/sidecar-controller/csi_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
type Handler interface {
CreateSnapshot(content *crdv1.VolumeSnapshotContent, parameters map[string]string, snapshotterCredentials map[string]string) (string, string, time.Time, int64, bool, error)
DeleteSnapshot(content *crdv1.VolumeSnapshotContent, snapshotterCredentials map[string]string) error
GetSnapshotStatus(content *crdv1.VolumeSnapshotContent, snapshotterListCredentials map[string]string) (bool, time.Time, int64, error)
GetSnapshotStatus(content *crdv1.VolumeSnapshotContent, snapshotterListCredentials map[string]string) (bool, time.Time, int64, string, error)
CreateGroupSnapshot(content *crdv1alpha1.VolumeGroupSnapshotContent, volumeIDs []string, parameters map[string]string, snapshotterCredentials map[string]string) (string, string, []*csi.Snapshot, time.Time, bool, error)
GetGroupSnapshotStatus(content *crdv1alpha1.VolumeGroupSnapshotContent, snapshotterListCredentials map[string]string) (bool, time.Time, error)
DeleteGroupSnapshot(content *crdv1alpha1.VolumeGroupSnapshotContent, SnapshotID []string, snapshotterCredentials map[string]string) error
Expand Down Expand Up @@ -113,7 +113,7 @@ func (handler *csiHandler) DeleteSnapshot(content *crdv1.VolumeSnapshotContent,
return nil
}

func (handler *csiHandler) GetSnapshotStatus(content *crdv1.VolumeSnapshotContent, snapshotterListCredentials map[string]string) (bool, time.Time, int64, error) {
func (handler *csiHandler) GetSnapshotStatus(content *crdv1.VolumeSnapshotContent, snapshotterListCredentials map[string]string) (bool, time.Time, int64, string, error) {
ctx, cancel := context.WithTimeout(context.Background(), handler.timeout)
defer cancel()

Expand All @@ -124,15 +124,15 @@ func (handler *csiHandler) GetSnapshotStatus(content *crdv1.VolumeSnapshotConten
} else if content.Spec.Source.SnapshotHandle != nil {
snapshotHandle = *content.Spec.Source.SnapshotHandle
} else {
return false, time.Time{}, 0, fmt.Errorf("failed to list snapshot for content %s: snapshotHandle is missing", content.Name)
return false, time.Time{}, 0, "", fmt.Errorf("failed to list snapshot for content %s: snapshotHandle is missing", content.Name)
}

csiSnapshotStatus, timestamp, size, err := handler.snapshotter.GetSnapshotStatus(ctx, snapshotHandle, snapshotterListCredentials)
csiSnapshotStatus, timestamp, size, groupSnapshotID, err := handler.snapshotter.GetSnapshotStatus(ctx, snapshotHandle, snapshotterListCredentials)
if err != nil {
return false, time.Time{}, 0, fmt.Errorf("failed to list snapshot for content %s: %q", content.Name, err)
return false, time.Time{}, 0, "", fmt.Errorf("failed to list snapshot for content %s: %q", content.Name, err)
}

return csiSnapshotStatus, timestamp, size, nil
return csiSnapshotStatus, timestamp, size, groupSnapshotID, nil
}

func makeSnapshotName(prefix, snapshotUID string, snapshotNameUUIDLength int) (string, error) {
Expand Down
Loading