From 28886a760b35333c5d3ad7876420cdbdcb0dc771 Mon Sep 17 00:00:00 2001 From: Will Dey Date: Wed, 4 Sep 2024 15:35:21 -0400 Subject: [PATCH 1/9] Fix resurrection by hanging ref and deletion by invalidated ancestry --- .../cassandra/db/ColumnFamilyStore.java | 52 ++++++++++++------ .../sstable/metadata/MetadataCollector.java | 1 + .../sstable/metadata/MetadataSerializer.java | 2 +- .../io/sstable/metadata/MetadataType.java | 3 +- .../metadata/ValidAncestorsMetadata.java | 55 +++++++++++++++++++ 5 files changed, 95 insertions(+), 18 deletions(-) create mode 100644 src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 084f9f56d8..9e56bfc1c6 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -49,6 +49,7 @@ import org.apache.cassandra.db.lifecycle.Tracker; import org.apache.cassandra.db.lifecycle.LifecycleTransaction; import org.apache.cassandra.io.FSWriteError; +import org.apache.cassandra.io.sstable.metadata.MetadataComponent; import org.json.simple.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -699,8 +700,15 @@ public static void removeUnfinishedCompactionLeftovers(CFMetaData metadata, Map< Set ancestors; try { - CompactionMetadata compactionMetadata = (CompactionMetadata) desc.getMetadataSerializer().deserialize(desc, MetadataType.COMPACTION); - ancestors = compactionMetadata.ancestors; + Map compactionMetadata = desc.getMetadataSerializer().deserialize(desc, EnumSet.of(MetadataType.COMPACTION, MetadataType.VALID_ANCESTORS)); + if (compactionMetadata.get(MetadataType.VALID_ANCESTORS) != null) + { + ancestors = ((CompactionMetadata) compactionMetadata.get(MetadataType.COMPACTION)).ancestors; + } + else + { + ancestors = Collections.emptySet(); + } } catch (IOException e) { @@ -833,6 +841,7 @@ public synchronized int loadNewSSTablesWithCount(boolean assumeCfIsEmpty) for (Map.Entry> entry : lister.list().entrySet()) { Descriptor descriptor = entry.getKey(); + Set components = entry.getValue(); if (currentDescriptors.contains(descriptor)) continue; // old (initialized) SSTable found, skipping @@ -856,28 +865,39 @@ public synchronized int loadNewSSTablesWithCount(boolean assumeCfIsEmpty) continue; } - // Increment the generation until we find a filename that doesn't exist. This is needed because the new - // SSTables that are being loaded might already use these generation numbers. Descriptor newDescriptor; - do + if (assumeCfIsEmpty) { - newDescriptor = new Descriptor(descriptor.version, - descriptor.directory, - descriptor.ksname, - descriptor.cfname, - fileIndexGenerator.incrementAndGet(), - Descriptor.Type.FINAL, - descriptor.formatType); + newDescriptor = descriptor; } - while (new File(newDescriptor.filenameFor(Component.DATA)).exists()); + else + { + // Increment the generation until we find a filename that doesn't exist. This is needed because the new + // SSTables that are being loaded might already use these generation numbers. + do + { + newDescriptor = new Descriptor(descriptor.version, + descriptor.directory, + descriptor.ksname, + descriptor.cfname, + fileIndexGenerator.incrementAndGet(), + Descriptor.Type.FINAL, + descriptor.formatType); + } + while (new File(newDescriptor.filenameFor(Component.DATA)).exists()); - logger.info("Renaming new SSTable {} to {}", descriptor, newDescriptor); - SSTableWriter.rename(descriptor, newDescriptor, entry.getValue()); + logger.info("Renaming new SSTable {} to {}", descriptor, newDescriptor); + SSTableWriter.rename(descriptor, newDescriptor, components); + + logger.info("Removing Statistics.db for new SSTable {} to clear old ancestor metadata", newDescriptor); + FileUtils.delete(new File(newDescriptor.filenameFor(Component.STATS))); + components = Sets.difference(components, ImmutableSet.of(Component.STATS)); + } SSTableReader reader; try { - reader = SSTableReader.open(newDescriptor, entry.getValue(), metadata, partitioner); + reader = SSTableReader.open(newDescriptor, components, metadata, partitioner); } catch (IOException e) { diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java index 579ff7a02d..aaa86dbabf 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java @@ -295,6 +295,7 @@ public Map finalizeMetadata(String partitioner, hasLegacyCounterShards, repairedAt)); components.put(MetadataType.COMPACTION, new CompactionMetadata(ancestors, cardinality)); + components.put(MetadataType.VALID_ANCESTORS, new ValidAncestorsMetadata()); return components; } } diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataSerializer.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataSerializer.java index 6120bafc4b..031df92e50 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataSerializer.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataSerializer.java @@ -122,8 +122,8 @@ public Map deserialize(Descriptor descriptor, F { in.seek(offset); component = type.serializer.deserialize(descriptor.version, in); + components.put(type, component); } - components.put(type, component); } return components; } diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java index 9717da1591..5a1c09da1e 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java @@ -27,7 +27,8 @@ public enum MetadataType /** Metadata only used at compaction */ COMPACTION(CompactionMetadata.serializer), /** Metadata always keep in memory */ - STATS(StatsMetadata.serializer); + STATS(StatsMetadata.serializer), + VALID_ANCESTORS(ValidAncestorsMetadata.serializer); public final IMetadataComponentSerializer serializer; diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java new file mode 100644 index 0000000000..2bb5d796bf --- /dev/null +++ b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.io.sstable.metadata; + +import java.io.DataInput; + +import org.apache.cassandra.io.sstable.format.Version; +import org.apache.cassandra.io.util.DataOutputPlus; + +/** + * Marker metadata component indicating this SSTable has valid compaction ancestor information. + */ +public class ValidAncestorsMetadata extends MetadataComponent +{ + public static final IMetadataComponentSerializer serializer = new ValidAncestorsMetadataSerializer(); + + public MetadataType getType() + { + return MetadataType.VALID_ANCESTORS; + } + + public static class ValidAncestorsMetadataSerializer implements IMetadataComponentSerializer + { + @Override + public int serializedSize(ValidAncestorsMetadata component, Version version) + { + return 0; + } + + @Override + public void serialize(ValidAncestorsMetadata component, Version version, DataOutputPlus out) {} + + @Override + public ValidAncestorsMetadata deserialize(Version version, DataInput in) + { + return new ValidAncestorsMetadata(); + } + } +} From e6676480601ff4b2fe60ee91abcf081bbe42c688 Mon Sep 17 00:00:00 2001 From: Will Dey Date: Thu, 5 Sep 2024 00:54:12 -0400 Subject: [PATCH 2/9] equals and hashcode --- .../io/sstable/metadata/ValidAncestorsMetadata.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java index 2bb5d796bf..1dca291713 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java @@ -35,6 +35,18 @@ public MetadataType getType() return MetadataType.VALID_ANCESTORS; } + @Override + public boolean equals(Object obj) + { + return obj instanceof ValidAncestorsMetadata; + } + + @Override + public int hashCode() + { + return 0; + } + public static class ValidAncestorsMetadataSerializer implements IMetadataComponentSerializer { @Override From 25cb71e2deb57d34b2908cdcaa011cad5abf888b Mon Sep 17 00:00:00 2001 From: Will Dey Date: Thu, 5 Sep 2024 00:58:57 -0400 Subject: [PATCH 3/9] actually valid use case for a singleton --- .../io/sstable/metadata/MetadataCollector.java | 2 +- .../sstable/metadata/ValidAncestorsMetadata.java | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java index aaa86dbabf..88afa3b5ba 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java @@ -295,7 +295,7 @@ public Map finalizeMetadata(String partitioner, hasLegacyCounterShards, repairedAt)); components.put(MetadataType.COMPACTION, new CompactionMetadata(ancestors, cardinality)); - components.put(MetadataType.VALID_ANCESTORS, new ValidAncestorsMetadata()); + components.put(MetadataType.VALID_ANCESTORS, ValidAncestorsMetadata.instance); return components; } } diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java index 1dca291713..a64a9dcc6e 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java @@ -29,23 +29,14 @@ public class ValidAncestorsMetadata extends MetadataComponent { public static final IMetadataComponentSerializer serializer = new ValidAncestorsMetadataSerializer(); + public static final ValidAncestorsMetadata instance = new ValidAncestorsMetadata(); public MetadataType getType() { return MetadataType.VALID_ANCESTORS; } - @Override - public boolean equals(Object obj) - { - return obj instanceof ValidAncestorsMetadata; - } - - @Override - public int hashCode() - { - return 0; - } + private ValidAncestorsMetadata() {} public static class ValidAncestorsMetadataSerializer implements IMetadataComponentSerializer { @@ -61,7 +52,7 @@ public void serialize(ValidAncestorsMetadata component, Version version, DataOut @Override public ValidAncestorsMetadata deserialize(Version version, DataInput in) { - return new ValidAncestorsMetadata(); + return instance; } } } From 00951d7b6bde160fbd27d171be1af41c283cbfe4 Mon Sep 17 00:00:00 2001 From: Will Dey Date: Thu, 5 Sep 2024 00:59:51 -0400 Subject: [PATCH 4/9] ordering --- .../cassandra/io/sstable/metadata/ValidAncestorsMetadata.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java index a64a9dcc6e..69092d3f64 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/ValidAncestorsMetadata.java @@ -31,13 +31,13 @@ public class ValidAncestorsMetadata extends MetadataComponent public static final IMetadataComponentSerializer serializer = new ValidAncestorsMetadataSerializer(); public static final ValidAncestorsMetadata instance = new ValidAncestorsMetadata(); + private ValidAncestorsMetadata() {} + public MetadataType getType() { return MetadataType.VALID_ANCESTORS; } - private ValidAncestorsMetadata() {} - public static class ValidAncestorsMetadataSerializer implements IMetadataComponentSerializer { @Override From fa2e482f3c5f7c7ca8d78da965f4f2fddbf65dee Mon Sep 17 00:00:00 2001 From: Will Dey Date: Thu, 5 Sep 2024 01:04:10 -0400 Subject: [PATCH 5/9] delete stats before rename in case of crash --- src/java/org/apache/cassandra/db/ColumnFamilyStore.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 9e56bfc1c6..437c01b0db 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -886,12 +886,12 @@ public synchronized int loadNewSSTablesWithCount(boolean assumeCfIsEmpty) } while (new File(newDescriptor.filenameFor(Component.DATA)).exists()); + logger.info("Removing Statistics.db for new SSTable {} to clear old ancestor metadata", descriptor); + FileUtils.delete(new File(descriptor.filenameFor(Component.STATS))); + components = Sets.difference(components, ImmutableSet.of(Component.STATS)); + logger.info("Renaming new SSTable {} to {}", descriptor, newDescriptor); SSTableWriter.rename(descriptor, newDescriptor, components); - - logger.info("Removing Statistics.db for new SSTable {} to clear old ancestor metadata", newDescriptor); - FileUtils.delete(new File(newDescriptor.filenameFor(Component.STATS))); - components = Sets.difference(components, ImmutableSet.of(Component.STATS)); } SSTableReader reader; From b6eaabd49e37425ee8f323832835a88c965b6e23 Mon Sep 17 00:00:00 2001 From: Will Dey Date: Thu, 5 Sep 2024 01:12:31 -0400 Subject: [PATCH 6/9] no longer have persistent stats on clearAndLoad --- .../io/sstable/format/SSTableReaderTest.java | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/test/unit/org/apache/cassandra/io/sstable/format/SSTableReaderTest.java b/test/unit/org/apache/cassandra/io/sstable/format/SSTableReaderTest.java index 6d07f1c3ed..d3d3ba532a 100644 --- a/test/unit/org/apache/cassandra/io/sstable/format/SSTableReaderTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/format/SSTableReaderTest.java @@ -196,26 +196,6 @@ public void testSpannedIndexPositions() throws IOException } } - @Test - public void testPersistentStatistics() - { - - Keyspace keyspace = Keyspace.open(KEYSPACE1); - ColumnFamilyStore store = keyspace.getColumnFamilyStore("Standard1"); - - for (int j = 0; j < 100; j += 2) - { - ByteBuffer key = ByteBufferUtil.bytes(String.valueOf(j)); - Mutation rm = new Mutation(KEYSPACE1, key); - rm.add("Standard1", cellname("0"), ByteBufferUtil.EMPTY_BYTE_BUFFER, j); - rm.applyUnsafe(); - } - store.forceBlockingFlush(); - - clearAndLoad(store); - assert store.metric.maxRowSize.getValue() != 0; - } - private void clearAndLoad(ColumnFamilyStore cfs) { cfs.clearUnsafe(); From 89d6b1652e83b6630064a3986170d099b616e7c4 Mon Sep 17 00:00:00 2001 From: Will Dey Date: Thu, 5 Sep 2024 01:22:31 -0400 Subject: [PATCH 7/9] metadata viewer --- .../cassandra/io/sstable/metadata/MetadataType.java | 1 + .../apache/cassandra/tools/SSTableMetadataViewer.java | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java index 5a1c09da1e..b2ec9c16fb 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataType.java @@ -28,6 +28,7 @@ public enum MetadataType COMPACTION(CompactionMetadata.serializer), /** Metadata always keep in memory */ STATS(StatsMetadata.serializer), + /** Meta-metadata about whether the ancestors metadata is valid **/ VALID_ANCESTORS(ValidAncestorsMetadata.serializer); public final IMetadataComponentSerializer serializer; diff --git a/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java b/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java index 831901439c..f75c4c76ff 100644 --- a/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java +++ b/src/java/org/apache/cassandra/tools/SSTableMetadataViewer.java @@ -54,6 +54,7 @@ public static void main(String[] args) throws IOException ValidationMetadata validation = (ValidationMetadata) metadata.get(MetadataType.VALIDATION); StatsMetadata stats = (StatsMetadata) metadata.get(MetadataType.STATS); CompactionMetadata compaction = (CompactionMetadata) metadata.get(MetadataType.COMPACTION); + ValidAncestorsMetadata validAncestors = (ValidAncestorsMetadata) metadata.get(MetadataType.VALID_ANCESTORS); out.printf("SSTable: %s%n", descriptor); if (validation != null) @@ -81,7 +82,15 @@ public static void main(String[] args) throws IOException } if (compaction != null) { - out.printf("Ancestors: %s%n", compaction.ancestors.toString()); + out.printf("Ancestors: %s", compaction.ancestors.toString()); + if (validAncestors != null) + { + out.println(" (considered valid)"); + } + else + { + out.println(" (considered invalid)"); + } out.printf("Estimated cardinality: %s%n", compaction.cardinalityEstimator.cardinality()); } From 806b7ae18c1e174fd6318dd781d2ba59da0140e9 Mon Sep 17 00:00:00 2001 From: svc-autorelease Date: Thu, 5 Sep 2024 15:20:33 +0000 Subject: [PATCH 8/9] Release 1.156.0-rc11 [skip ci] From 898502e34d90ad12485dcbe3673f05f492eed81e Mon Sep 17 00:00:00 2001 From: Will Dey Date: Fri, 6 Sep 2024 09:57:59 -0400 Subject: [PATCH 9/9] dry run ability --- .../org/apache/cassandra/db/ColumnFamilyStore.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java index 437c01b0db..fa26c934d1 100644 --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@ -743,8 +743,15 @@ public static void removeUnfinishedCompactionLeftovers(CFMetaData metadata, Map< if (completedAncestors.contains(desc.generation)) { // if any of the ancestors were participating in a compaction, finish that compaction - logger.info("Going to delete leftover compaction ancestor {}", desc); - SSTable.delete(desc, sstableFiles.getValue()); + if (Boolean.getBoolean("palantir_cassandra.dry_run_ancestor_deletion")) + { + logger.info("Would have deleted leftover compaction ancestor {} if palantir_cassandra.dry_run_ancestor_deletion was false", desc); + } + else + { + logger.info("Going to delete leftover compaction ancestor {}", desc); + SSTable.delete(desc, sstableFiles.getValue()); + } UUID compactionTaskID = unfinishedCompactions.get(desc.generation); if (compactionTaskID != null) SystemKeyspace.finishCompaction(unfinishedCompactions.get(desc.generation));