From 18163a8a171ed3827949144d874bc00a3ba55170 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Fri, 2 Sep 2022 17:46:25 +0800 Subject: [PATCH] storage: correct the write amplification (#5740) close pingcap/tiflash#5738 --- dbms/src/Common/ProfileEvents.cpp | 1 - dbms/src/Common/TiFlashMetrics.h | 1 - .../DeltaMerge/Delta/ColumnFileFlushTask.cpp | 15 ---- .../Storages/DeltaMerge/Delta/MemTableSet.cpp | 9 --- .../SSTFilesToDTFilesOutputStream.cpp | 8 -- metrics/grafana/tiflash_summary.json | 79 +++++++++++-------- 6 files changed, 44 insertions(+), 69 deletions(-) diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index 7507ff0b1f8..6bce34af2b9 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -67,7 +67,6 @@ M(PSMVCCCompactOnDeltaRebaseRejected) \ M(PSMVCCCompactOnBase) \ \ - M(DMWriteBytes) \ M(DMWriteBlock) \ M(DMWriteBlockNS) \ M(DMWriteFile) \ diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 0c969fe24f5..6e4774494e5 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -103,7 +103,6 @@ namespace DB F(type_raft_wait_index_duration, {{"type", "tmt_raft_wait_index_duration"}}, ExpBuckets{0.001, 2, 20})) \ M(tiflash_syncing_data_freshness, "The freshness of tiflash data with tikv data", Histogram, \ F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.001, 2, 20})) \ - M(tiflash_storage_write_amplification, "The data write amplification in storage engine", Gauge) \ M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ M(tiflash_storage_command_count, "Total number of storage's command, such as delete range / shutdown /startup", Counter, \ F(type_delete_range, {"type", "delete_range"}), F(type_ingest, {"type", "ingest"})) \ diff --git a/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp b/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp index 373efa10445..71a504dc6e0 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp @@ -21,14 +21,6 @@ #include #include -namespace ProfileEvents -{ -extern const Event DMWriteBytes; -extern const Event PSMWriteBytes; -extern const Event WriteBufferFromFileDescriptorWriteBytes; -extern const Event WriteBufferAIOWriteBytes; -} // namespace ProfileEvents - namespace DB { namespace DM @@ -104,13 +96,6 @@ bool ColumnFileFlushTask::commit(ColumnFilePersistedSetPtr & persisted_file_set, mem_table_set->removeColumnFilesInFlushTask(*this); - // Also update the write amplification - auto total_write = ProfileEvents::counters[ProfileEvents::DMWriteBytes].load(std::memory_order_relaxed); - auto actual_write = ProfileEvents::counters[ProfileEvents::PSMWriteBytes].load(std::memory_order_relaxed) - + ProfileEvents::counters[ProfileEvents::WriteBufferFromFileDescriptorWriteBytes].load(std::memory_order_relaxed) - + ProfileEvents::counters[ProfileEvents::WriteBufferAIOWriteBytes].load(std::memory_order_relaxed); - GET_METRIC(tiflash_storage_write_amplification) - .Set((static_cast(actual_write) / 1024 / 1024) / (static_cast(total_write) / 1024 / 1024)); return true; } } // namespace DM diff --git a/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp b/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp index e94912b8a9d..eb1a0af55d8 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/MemTableSet.cpp @@ -22,11 +22,6 @@ #include #include -namespace ProfileEvents -{ -extern const Event DMWriteBytes; -} - namespace DB { namespace DM @@ -264,7 +259,6 @@ void MemTableSet::removeColumnFilesInFlushTask(const ColumnFileFlushTask & flush if (unlikely(tasks.size() > column_files.size())) throw Exception("column_files num check failed", ErrorCodes::LOGICAL_ERROR); - size_t flush_bytes = 0; auto column_file_iter = column_files.begin(); for (const auto & task : tasks) { @@ -272,7 +266,6 @@ void MemTableSet::removeColumnFilesInFlushTask(const ColumnFileFlushTask & flush { throw Exception("column_files check failed", ErrorCodes::LOGICAL_ERROR); } - flush_bytes += task.column_file->getBytes(); column_file_iter++; } ColumnFiles new_column_files; @@ -292,8 +285,6 @@ void MemTableSet::removeColumnFilesInFlushTask(const ColumnFileFlushTask & flush rows = new_rows; bytes = new_bytes; deletes = new_deletes; - - ProfileEvents::increment(ProfileEvents::DMWriteBytes, flush_bytes); } diff --git a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp index 3ee268152ad..216bf56b8f5 100644 --- a/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp +++ b/dbms/src/Storages/DeltaMerge/SSTFilesToDTFilesOutputStream.cpp @@ -29,11 +29,6 @@ #include #include -namespace ProfileEvents -{ -extern const Event DMWriteBytes; -} - namespace DB { namespace ErrorCodes @@ -84,9 +79,6 @@ void SSTFilesToDTFilesOutputStream::writeSuffix() const auto bytes_written = dt_file->getBytesOnDisk(); storage->getStore()->preIngestFile(dt_file->parentPath(), dt_file->fileId(), bytes_written); - // Report DMWriteBytes for calculating write amplification - ProfileEvents::increment(ProfileEvents::DMWriteBytes, bytes_written); - dt_stream.reset(); } diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 3e2afad435a..27008448fbf 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -2,7 +2,7 @@ "__inputs": [ { "name": "DS_TEST-CLUSTER", - "label": "test-cluster", + "label": "Test-Cluster", "description": "", "type": "datasource", "pluginId": "prometheus", @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1654217728945, + "iteration": 1661869798239, "links": [], "panels": [ { @@ -545,9 +545,9 @@ "seriesOverrides": [ { "alias": "/limit/", + "color": "#C4162A", "fill": 0, - "nullPointMode": "null", - "color": "#C4162A" + "nullPointMode": "null" } ], "spaceLength": 10, @@ -642,11 +642,11 @@ "refId": "K" }, { + "exemplar": true, "expr": "sum(tiflash_system_current_metric_MemoryCapacity{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, "legendFormat": "limit-{{instance}}", - "exemplar": true, - "refId": "L", - "hide": false + "refId": "L" } ], "thresholds": [], @@ -745,9 +745,9 @@ }, { "alias": "/limit/", + "color": "#C4162A", "fill": 0, - "nullPointMode": "null", - "color": "#C4162A" + "nullPointMode": "null" } ], "spaceLength": 10, @@ -764,11 +764,11 @@ "step": 40 }, { + "exemplar": true, "expr": "sum(tiflash_system_current_metric_LogicalCPUCores{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "intervalFactor": 1, "legendFormat": "limit-{{instance}}", - "exemplar": true, - "refId": "B", - "intervalFactor": 1 + "refId": "B" } ], "thresholds": [], @@ -3673,7 +3673,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, @@ -3684,11 +3684,8 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/5min-write/", - "yaxis": 2 - }, - { - "alias": "/5min-all/", + "$$hashKey": "object:169", + "alias": "/fs|write/", "yaxis": 2 } ], @@ -3697,55 +3694,63 @@ "steppedLine": false, "targets": [ { - "expr": "max(tiflash_storage_write_amplification{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "exemplar": true, + "expr": "sum by (instance) (\ntiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"} +\ntiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"} +\ntiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}\n)\n/\nsum by (instance) (\ntiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}\n)", "format": "time_series", "hide": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "total-{{instance}}", + "legendFormat": "amp-total-{{instance}}", "refId": "A" }, { "exemplar": true, - "expr": "sum((rate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) + rate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) + rate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])) / (rate(tiflash_system_profile_event_DMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]))) by (instance)", + "expr": "sum by (instance) (\nrate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) +\nrate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) +\nrate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])\n)\n/\nsum by (instance) (\nrate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[5m])\n)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "5min-{{instance}}", + "legendFormat": "amp-5min-{{instance}}", "refId": "B" }, { "exemplar": true, - "expr": "sum((rate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m]) + rate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m]) + rate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m])) / (rate(tiflash_system_profile_event_DMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m]))) by (instance)", + "expr": "sum by (instance) (\nrate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m]) +\nrate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m]) +\nrate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[10m])\n)\n/\nsum by (instance) (\nrate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[10m])\n)", "format": "time_series", "hide": true, "interval": "", "intervalFactor": 1, - "legendFormat": "10min-{{instance}}", + "legendFormat": "amp-10min-{{instance}}", "refId": "C" }, { - "expr": "sum((rate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m]) + rate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m]) + rate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m])) / (rate(tiflash_system_profile_event_DMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m]))) by (instance)", + "exemplar": true, + "expr": "sum by (instance) (\nrate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m]) +\nrate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m]) +\nrate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30m])\n)\n/\nsum by (instance) (\nrate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[30m])\n)", "format": "time_series", "hide": true, + "interval": "", "intervalFactor": 1, - "legendFormat": "30min-{{instance}}", + "legendFormat": "amp-30min-{{instance}}", "refId": "D" }, { - "expr": "sum((rate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) + rate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) + rate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]))) by (instance)", + "exemplar": true, + "expr": "sum by (instance) (\nrate(tiflash_system_profile_event_PSMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) +\nrate(tiflash_system_profile_event_WriteBufferFromFileDescriptorWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m]) +\nrate(tiflash_system_profile_event_WriteBufferAIOWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])\n)", "format": "time_series", "hide": true, + "interval": "", "intervalFactor": 1, - "legendFormat": "5min-all-{{instance}}", + "legendFormat": "fs-5min-{{instance}}", "refId": "E" }, { - "expr": "sum(rate(tiflash_system_profile_event_DMWriteBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[5m])) by (instance)", + "exemplar": true, + "expr": "sum by (instance) (\nrate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[5m])\n)", "format": "time_series", "hide": true, + "interval": "", "intervalFactor": 1, - "legendFormat": "5min-write-{{instance}}", + "legendFormat": "write-5min-{{instance}}", "refId": "F" } ], @@ -3759,6 +3764,7 @@ "sort": 0, "value_type": "individual" }, + "transformations": [], "type": "graph", "xaxis": { "buckets": null, @@ -3769,15 +3775,17 @@ }, "yaxes": [ { + "$$hashKey": "object:225", "decimals": null, "format": "short", "label": null, "logBase": 1, - "max": null, + "max": "20", "min": "0", "show": true }, { + "$$hashKey": "object:226", "format": "binBps", "label": null, "logBase": 1, @@ -6273,7 +6281,7 @@ "h": 9, "w": 24, "x": 0, - "y": 71 + "y": 7 }, "height": "", "hiddenSeries": false, @@ -6309,6 +6317,7 @@ "repeatedByRow": true, "seriesOverrides": [ { + "$$hashKey": "object:83", "alias": "/total/", "yaxis": 2 } @@ -6410,7 +6419,7 @@ "h": 8, "w": 24, "x": 0, - "y": 80 + "y": 16 }, "hiddenSeries": false, "id": 62, @@ -6522,7 +6531,7 @@ "h": 9, "w": 24, "x": 0, - "y": 88 + "y": 24 }, "height": "", "hiddenSeries": false, @@ -6644,7 +6653,7 @@ "h": 9, "w": 24, "x": 0, - "y": 97 + "y": 33 }, "hiddenSeries": false, "id": 90,