Skip to content

Commit

Permalink
IMPALA-13638: Translate apostrophe to underscore in Prometheus metric…
Browse files Browse the repository at this point in the history
… names.

Impala has some metrics that reflect the state of the JVM. Some of these
metrics have names that are partly composed of the names of the
MemoryPoolMXBean objects in the Java virtual machine. In Jdk 8 these
are names like "Code Cache" and "PS Eden Space". In Jdk 11 these names
include apostrophe characters, for example "CodeHeap 'profiled
nmethods'". The derived metric names work OK for Impala in both the
webui and in json output. However the apostrophe character is illegal
in Prometheus metric names per
https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
and these metrics cannot be consumed by Prometheus. Fix this by adding
the apostrophe to the list of characters that are mapped to underscores
when we translate the metric names for Prometheus metrics.

TESTING:

Extended the test_prometheus_metrics test to parse all generated
Prometheus metrics. Ran the test with Jdk 11 where it failed without
the server fix

Change-Id: I557b123c075dff0b14ac527de08bc6177bd2a3f6
IMPALA-13596: first cut at tidied code
Reviewed-on: http://gerrit.cloudera.org:8080/22295
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
  • Loading branch information
bartash authored and Impala Public Jenkins committed Jan 6, 2025
1 parent 1907153 commit 21ef3e6
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 11 deletions.
8 changes: 4 additions & 4 deletions be/src/util/metrics-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,12 @@ TEST_F(MetricsTest, PrometheusMetricNames) {
EXPECT_EQ("impala_server_metric",
MetricGroup::ImpalaToPrometheusName("impala-server-metric"));

// Test that . and - get transformed to _.
EXPECT_EQ("impala_metric_name_with_punctuation_",
MetricGroup::ImpalaToPrometheusName("metric-name.with-punctuation_"));
// Test that ".", "'" and "-" get transformed to _.
EXPECT_EQ("impala_metric__name__with_punctuation_",
MetricGroup::ImpalaToPrometheusName("metric-'name'.with-punctuation_"));

// Other special characters are unmodified
EXPECT_EQ("impala_!@#$%*", MetricGroup::ImpalaToPrometheusName("!@#$%*"));
EXPECT_EQ("impala_!@#$%*:", MetricGroup::ImpalaToPrometheusName("!@#$%*:"));
}

void AssertPrometheus(const std::stringstream& val, const string& name,
Expand Down
2 changes: 1 addition & 1 deletion be/src/util/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ string MetricGroup::ImpalaToPrometheusName(const string& impala_metric_name) {
// Substitute characters as needed to match prometheus conventions. The string is
// already the right size so we can do this in place.
for (size_t i = 0; i < result.size(); ++i) {
if (result[i] == '.' || result[i] == '-') result[i] = '_';
if (result[i] == '.' || result[i] == '-' || result[i] == '\'') result[i] = '_';
}
if (result.compare(0, 7, "impala_") != 0) {
result.insert(0, "impala_");
Expand Down
1 change: 1 addition & 0 deletions infra/python/deps/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ kerberos == 1.3.1
pexpect == 3.3
pg8000 == 1.10.2
prettytable == 0.7.2
prometheus-client == 0.12.0
psutil == 5.6.3
pyparsing == 2.0.3
pytest == 2.9.2
Expand Down
20 changes: 14 additions & 6 deletions tests/webserver/test_web_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from tests.util.parse_util import parse_duration_string_ms
from datetime import datetime
from multiprocessing import Process, Queue
from prometheus_client.parser import text_string_to_metric_families
from time import sleep, time
import itertools
import json
Expand Down Expand Up @@ -506,7 +507,7 @@ def __test_catalog_tablesfilesusage(self, db_name, tbl_name, numfiles):

def test_query_stmt(self):
"""Create a long select query then check if it is truncated in the response json."""
# The imput query is a select + 450 "x " long, which is long enough to get truncated.
# The input query is a select + 450 "x " long, which is long enough to get truncated.
query = "select \"{0}\"".format("x " * 450)
# The expected result query should be 253 long and contains the first 250
# chars + "..."
Expand All @@ -515,7 +516,7 @@ def test_query_stmt(self):
response_json = self.__run_query_and_get_debug_page(
query, self.QUERIES_URL, expected_state=self.client.QUERY_STATES["FINISHED"])
# Search the json for the expected value.
# The query can be in in_filght_queries even though it is in FINISHED state.
# The query can be in in_flight_queries even though it is in FINISHED state.
for json_part in itertools.chain(
response_json['completed_queries'], response_json['in_flight_queries']):
if expected_result in json_part['stmt']:
Expand Down Expand Up @@ -842,7 +843,7 @@ def test_download_profile(self):
# the query is in the file.
responses = self.get_and_check_status(
self.ROOT_URL + download_link, query, self.IMPALAD_TEST_PORT)
# Check the query id is in the content of the reponse.
# Check the query id is in the content of the response.
assert len(responses) == 1
assert query_id in responses[0].text
elif profile_format == 'Json':
Expand Down Expand Up @@ -870,6 +871,13 @@ def test_prometheus_metrics(self):
# check if metric shows up
assert 'impala_statestore_subscriber_heartbeat_interval_time_min' in resp[0].text

for response in resp:
# Parse Prometheus text format using prometheus_client library.
for _ in text_string_to_metric_families(response.text):
# We have to loop through the result to force the lazy parsing function
# to parse every line.
continue

def test_healthz_endpoint(self):
"""Test to check that the /healthz endpoint returns 200 OK."""
for port in self.TEST_PORTS_WITH_SS:
Expand All @@ -882,11 +890,11 @@ def test_knox_compatibility(self):
"""Checks that the template files conform to the requirements for compatibility with
the Apache Knox service definition."""
# Matches all 'a' links with an 'href' that doesn't start with either '#' (which stays
# on the same page an so doesn't need to the hostname) or '{{ __common__.host-url }}'
# on the same page and so doesn't need to the hostname) or '{{ __common__.host-url }}'
# Note that if we ever need to add a link that doesn't conform to this, we will
# probably also have to change the Knox service definition.
href_regex = "<(a|link) .*? href=['\"](?!({{ __common__.host-url }})|#)"
# Matches all 'script' tags that aren't absoluve urls.
# Matches all 'script' tags that aren't absolute urls.
script_regex = "<script .*?src=['\"](?!({{ __common__.host-url }})|http)"
# Matches all 'form' tags that are not followed by including the hidden inputs.
form_regex = "<form [^{]*?>(?!{{>www/form-hidden-inputs.tmpl}})"
Expand Down Expand Up @@ -1128,7 +1136,7 @@ def run(queue, client, query):

@pytest.mark.execute_serially
def test_hadoop_varz_page(self):
"""test for /hadoop-var to check availablity of haqoop configuration like
"""test for /hadoop-var to check availability of hadoop configuration like
hive warehouse dir, fs.defaultFS"""
responses = self.get_and_check_status(self.HADOOP_VARZ_URL,
"hive.metastore.warehouse.dir", ports_to_test=self.TEST_PORTS_WITHOUT_SS)
Expand Down

0 comments on commit 21ef3e6

Please sign in to comment.