From 7551ce3812c26a37753d9ba25b84a592ad53d228 Mon Sep 17 00:00:00 2001 From: Stephen Marshall Date: Thu, 7 Jun 2018 10:25:16 +0100 Subject: [PATCH] Map metric names based on description --- internal/metrics/exporter_test.go | 6 +- internal/metrics/update.go | 156 +++++++++++++++++++++++++++--- internal/metrics/update_test.go | 69 ++++++++++--- 3 files changed, 200 insertions(+), 31 deletions(-) diff --git a/internal/metrics/exporter_test.go b/internal/metrics/exporter_test.go index e24054c..e8db353 100644 --- a/internal/metrics/exporter_test.go +++ b/internal/metrics/exporter_test.go @@ -45,7 +45,7 @@ func TestDescribe(t *testing.T) { select { case prometheusDesc := <-ch: - expected := "Desc{fqName: \"ibmmq_qmgr_Element1Name\", help: \"Element1Description\", constLabels: {}, variableLabels: [qmgr]}" + expected := "Desc{fqName: \"ibmmq_qmgr_" + testElement1Name + "\", help: \"" + testElement1Description + "\", constLabels: {}, variableLabels: [qmgr]}" actual := prometheusDesc.String() if actual != expected { t.Errorf("Expected value=%s; actual %s", expected, actual) @@ -62,7 +62,7 @@ func TestCollect(t *testing.T) { log := getTestLogger() exporter := newExporter("qmName", log) - exporter.gaugeMap["ClassName/Type1Name/Element1Name"] = createGaugeVec("Element1Name", "Element1Description", false) + exporter.gaugeMap[testKey1] = createGaugeVec(testElement1Name, testElement1Description, false) for i := 1; i <= 3; i++ { @@ -85,7 +85,7 @@ func TestCollect(t *testing.T) { select { case <-ch: prometheusMetric := dto.Metric{} - exporter.gaugeMap["ClassName/Type1Name/Element1Name"].WithLabelValues("qmName").Write(&prometheusMetric) + exporter.gaugeMap[testKey1].WithLabelValues("qmName").Write(&prometheusMetric) actual := prometheusMetric.GetGauge().GetValue() if i == 1 { diff --git a/internal/metrics/update.go b/internal/metrics/update.go index 121c5cd..2b12050 100644 --- a/internal/metrics/update.go +++ b/internal/metrics/update.go @@ -132,20 +132,34 @@ func initialiseMetrics(log *logger.Logger) (map[string]*metricData, error) { metrics := make(map[string]*metricData) validMetrics := true + metricNamesMap := generateMetricNamesMap() for _, metricClass := range mqmetric.Metrics.Classes { for _, metricType := range metricClass.Types { if !strings.Contains(metricType.ObjectTopic, "%s") { for _, metricElement := range metricType.Elements { - metric := metricData{ - name: metricElement.MetricName, - description: metricElement.Description, - } + + // Get unique metric key key := makeKey(metricElement) - if _, exists := metrics[key]; !exists { - metrics[key] = &metric + + // Get metric name from mapping + if metricName, found := metricNamesMap[key]; found { + + // Set metric details + metric := metricData{ + name: metricName, + description: metricElement.Description, + } + + // Add metric + if _, exists := metrics[key]; !exists { + metrics[key] = &metric + } else { + log.Errorf("Metrics Error: Found duplicate metric key %s", key) + validMetrics = false + } } else { - log.Errorf("Metrics Error: Found duplicate metric key %s", key) + log.Errorf("Metrics Error: Skipping metric, unexpected key %s", key) validMetrics = false } } @@ -154,7 +168,7 @@ func initialiseMetrics(log *logger.Logger) (map[string]*metricData, error) { } if !validMetrics { - return metrics, fmt.Errorf("Invalid metrics data - found duplicate metric keys") + return metrics, fmt.Errorf("Invalid metrics data") } return metrics, nil } @@ -168,13 +182,15 @@ func updateMetrics(metrics map[string]*metricData) { for _, metricElement := range metricType.Elements { // Clear existing metric values - metric := metrics[makeKey(metricElement)] - metric.values = make(map[string]float64) + metric, ok := metrics[makeKey(metricElement)] + if ok { + metric.values = make(map[string]float64) - // Update metric with cached values of publication data - for label, value := range metricElement.Values { - normalisedValue := mqmetric.Normalise(metricElement, label, value) - metric.values[label] = normalisedValue + // Update metric with cached values of publication data + for label, value := range metricElement.Values { + normalisedValue := mqmetric.Normalise(metricElement, label, value) + metric.values[label] = normalisedValue + } } // Reset cached values of publication data for this metric @@ -187,5 +203,115 @@ func updateMetrics(metrics map[string]*metricData) { // makeKey builds a unique key for each metric func makeKey(metricElement *mqmetric.MonElement) string { - return metricElement.Parent.Parent.Name + "/" + metricElement.Parent.Name + "/" + metricElement.MetricName + return metricElement.Parent.Parent.Name + "/" + metricElement.Parent.Name + "/" + metricElement.Description +} + +// generateMetricNamesMap generates metric names mapped from their description +func generateMetricNamesMap() map[string]string { + + metricNamesMap := make(map[string]string) + + var mappings = []struct { + key string + value string + }{ + {"CPU/SystemSummary/CPU load - five minute average", "cpu_load_five_minute_average_percentage"}, + {"CPU/SystemSummary/CPU load - fifteen minute average", "cpu_load_fifteen_minute_average_percentage"}, + {"CPU/SystemSummary/RAM free percentage", "ram_free_percentage"}, + {"CPU/SystemSummary/RAM total bytes", "ram_total_bytes"}, + {"CPU/SystemSummary/User CPU time percentage", "user_cpu_time_percentage"}, + {"CPU/SystemSummary/System CPU time percentage", "system_cpu_time_percentage"}, + {"CPU/SystemSummary/CPU load - one minute average", "cpu_load_one_minute_average_percentage"}, + {"CPU/QMgrSummary/System CPU time - percentage estimate for queue manager", "system_cpu_time_estimate_for_queue_manager_percentage"}, + {"CPU/QMgrSummary/RAM total bytes - estimate for queue manager", "ram_total_estimate_for_queue_manager_bytes"}, + {"CPU/QMgrSummary/User CPU time - percentage estimate for queue manager", "user_cpu_time_estimate_for_queue_manager_percentage"}, + {"DISK/SystemSummary/MQ trace file system - bytes in use", "mq_trace_file_system_in_use_bytes"}, + {"DISK/SystemSummary/MQ trace file system - free space", "mq_trace_file_system_free_space_percentage"}, + {"DISK/SystemSummary/MQ errors file system - bytes in use", "mq_errors_file_system_in_use_bytes"}, + {"DISK/SystemSummary/MQ errors file system - free space", "mq_errors_file_system_free_space_percentage"}, + {"DISK/SystemSummary/MQ FDC file count", "mq_fdc_file_count"}, + {"DISK/QMgrSummary/Queue Manager file system - bytes in use", "queue_manager_file_system_in_use_bytes"}, + {"DISK/QMgrSummary/Queue Manager file system - free space", "queue_manager_file_system_free_space_percentage"}, + {"DISK/Log/Log - bytes occupied by reusable extents", "log_occupied_by_reusable_extents_bytes"}, + {"DISK/Log/Log - write size", "log_write_size_bytes"}, + {"DISK/Log/Log - bytes in use", "log_in_use_bytes"}, + {"DISK/Log/Log - logical bytes written", "log_logical_written_bytes"}, + {"DISK/Log/Log - write latency", "log_write_latency_seconds"}, + {"DISK/Log/Log - bytes required for media recovery", "log_required_for_media_recovery_bytes"}, + {"DISK/Log/Log - current primary space in use", "log_current_primary_space_in_use_percentage"}, + {"DISK/Log/Log - workload primary space utilization", "log_workload_primary_space_utilization_percentage"}, + {"DISK/Log/Log - bytes occupied by extents waiting to be archived", "log_occupied_by_extents_waiting_to_be_archived_bytes"}, + {"DISK/Log/Log - bytes max", "log_max_bytes"}, + {"DISK/Log/Log file system - bytes in use", "log_file_system_in_use_bytes"}, + {"DISK/Log/Log file system - bytes max", "log_file_system_max_bytes"}, + {"DISK/Log/Log - physical bytes written", "log_physical_written_bytes"}, + {"STATMQI/SUBSCRIBE/Create durable subscription count", "create_durable_subscription_count"}, + {"STATMQI/SUBSCRIBE/Resume durable subscription count", "resume_durable_subscription_count"}, + {"STATMQI/SUBSCRIBE/Create non-durable subscription count", "create_non_durable_subscription_count"}, + {"STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count", "failed_create_alter_resume_subscription_count"}, + {"STATMQI/SUBSCRIBE/Subscription delete failure count", "subscription_delete_failure_count"}, + {"STATMQI/SUBSCRIBE/MQSUBRQ count", "mqsubrq_count"}, + {"STATMQI/SUBSCRIBE/Failed MQSUBRQ count", "failed_mqsubrq_count"}, + {"STATMQI/SUBSCRIBE/Durable subscriber - high water mark", "durable_subscriber_high_water_mark_count"}, + {"STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark", "non_durable_subscriber_high_water_mark_count"}, + {"STATMQI/SUBSCRIBE/Durable subscriber - low water mark", "durable_subscriber_low_water_mark_count"}, + {"STATMQI/SUBSCRIBE/Delete non-durable subscription count", "delete_non_durable_subscription_count"}, + {"STATMQI/SUBSCRIBE/Alter durable subscription count", "alter_durable_subscription_count"}, + {"STATMQI/SUBSCRIBE/Delete durable subscription count", "delete_durable_subscription_count"}, + {"STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark", "non_durable_subscriber_low_water_mark_count"}, + {"STATMQI/PUBLISH/Interval total topic bytes put", "interval_total_topic_put_bytes"}, + {"STATMQI/PUBLISH/Published to subscribers - message count", "published_to_subscribers_message_count"}, + {"STATMQI/PUBLISH/Published to subscribers - byte count", "published_to_subscribers_bytes"}, + {"STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count", "non_persistent_topic_mqput_mqput1_count"}, + {"STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count", "persistent_topic_mqput_mqput1_count"}, + {"STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count", "failed_topic_mqput_mqput1_count"}, + {"STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total", "topic_mqput_mqput1_interval_count"}, + {"STATMQI/CONNDISC/MQCONN/MQCONNX count", "mqconn_mqconnx_count"}, + {"STATMQI/CONNDISC/Failed MQCONN/MQCONNX count", "failed_mqconn_mqconnx_count"}, + {"STATMQI/CONNDISC/Concurrent connections - high water mark", "concurrent_connections_high_water_mark_count"}, + {"STATMQI/CONNDISC/MQDISC count", "mqdisc_count"}, + {"STATMQI/OPENCLOSE/MQOPEN count", "mqopen_count"}, + {"STATMQI/OPENCLOSE/Failed MQOPEN count", "failed_mqopen_count"}, + {"STATMQI/OPENCLOSE/MQCLOSE count", "mqclose_count"}, + {"STATMQI/OPENCLOSE/Failed MQCLOSE count", "failed_mqclose_count"}, + {"STATMQI/INQSET/MQINQ count", "mqinq_count"}, + {"STATMQI/INQSET/Failed MQINQ count", "failed_mqinq_count"}, + {"STATMQI/INQSET/MQSET count", "mqset_count"}, + {"STATMQI/INQSET/Failed MQSET count", "failed_mqset_count"}, + {"STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count", "interval_total_mqput_mqput1_bytes"}, + {"STATMQI/PUT/Persistent message MQPUT count", "persistent_message_mqput_count"}, + {"STATMQI/PUT/Failed MQPUT count", "failed_mqput_count"}, + {"STATMQI/PUT/Non-persistent message MQPUT1 count", "non_persistent_message_mqput1_count"}, + {"STATMQI/PUT/Persistent message MQPUT1 count", "persistent_message_mqput1_count"}, + {"STATMQI/PUT/Failed MQPUT1 count", "failed_mqput1_count"}, + {"STATMQI/PUT/Put non-persistent messages - byte count", "put_non_persistent_messages_bytes"}, + {"STATMQI/PUT/Interval total MQPUT/MQPUT1 count", "interval_total_mqput_mqput1_count"}, + {"STATMQI/PUT/Put persistent messages - byte count", "put_persistent_messages_bytes"}, + {"STATMQI/PUT/MQSTAT count", "mqstat_count"}, + {"STATMQI/PUT/Non-persistent message MQPUT count", "non_persistent_message_mqput_count"}, + {"STATMQI/GET/Interval total destructive get- count", "interval_total_destructive_get_count"}, + {"STATMQI/GET/MQCTL count", "mqctl_count"}, + {"STATMQI/GET/Failed MQGET - count", "failed_mqget_count"}, + {"STATMQI/GET/Got non-persistent messages - byte count", "got_non_persistent_messages_bytes"}, + {"STATMQI/GET/Persistent message browse - count", "persistent_message_browse_count"}, + {"STATMQI/GET/Expired message count", "expired_message_count"}, + {"STATMQI/GET/Purged queue count", "purged_queue_count"}, + {"STATMQI/GET/Interval total destructive get - byte count", "interval_total_destructive_get_bytes"}, + {"STATMQI/GET/Non-persistent message destructive get - count", "non_persistent_message_destructive_get_count"}, + {"STATMQI/GET/Got persistent messages - byte count", "got_persistent_messages_bytes"}, + {"STATMQI/GET/Non-persistent message browse - count", "non_persistent_message_browse_count"}, + {"STATMQI/GET/Failed browse count", "failed_browse_count"}, + {"STATMQI/GET/Persistent message destructive get - count", "persistent_message_destructive_get_count"}, + {"STATMQI/GET/Non-persistent message browse - byte count", "non_persistent_message_browse_bytes"}, + {"STATMQI/GET/Persistent message browse - byte count", "persistent_message_browse_bytes"}, + {"STATMQI/GET/MQCB count", "mqcb_count"}, + {"STATMQI/GET/Failed MQCB count", "failed_mqcb_count"}, + {"STATMQI/SYNCPOINT/Commit count", "commit_count"}, + {"STATMQI/SYNCPOINT/Rollback count", "rollback_count"}, + } + + for _, mapping := range mappings { + metricNamesMap[mapping.key] = mapping.value + } + return metricNamesMap } diff --git a/internal/metrics/update_test.go b/internal/metrics/update_test.go index af38905..f539338 100644 --- a/internal/metrics/update_test.go +++ b/internal/metrics/update_test.go @@ -23,13 +23,24 @@ import ( "github.com/ibm-messaging/mq-golang/mqmetric" ) +const ( + testClassName = "CPU" + testTypeName = "SystemSummary" + testElement1Name = "cpu_load_five_minute_average_percentage" + testElement2Name = "cpu_load_fifteen_minute_average_percentage" + testElement1Description = "CPU load - five minute average" + testElement2Description = "CPU load - fifteen minute average" + testKey1 = testClassName + "/" + testTypeName + "/" + testElement1Description + testKey2 = testClassName + "/" + testTypeName + "/" + testElement2Description +) + func TestInitialiseMetrics(t *testing.T) { teardownTestCase := setupTestCase(false) defer teardownTestCase() metrics, err := initialiseMetrics(getTestLogger()) - metric, ok := metrics["ClassName/Type1Name/Element1Name"] + metric, ok := metrics[testKey1] if err != nil { t.Errorf("Unexpected error %s", err.Error()) @@ -37,11 +48,11 @@ func TestInitialiseMetrics(t *testing.T) { if !ok { t.Error("Expected metric not found in map") } else { - if metric.name != "Element1Name" { - t.Errorf("Expected name=%s; actual %s", "Element1Name", metric.name) + if metric.name != testElement1Name { + t.Errorf("Expected name=%s; actual %s", testElement1Name, metric.name) } - if metric.description != "Element1Description" { - t.Errorf("Expected description=%s; actual %s", "Element1Description", metric.description) + if metric.description != testElement1Description { + t.Errorf("Expected description=%s; actual %s", testElement1Description, metric.description) } if metric.objectType != false { t.Errorf("Expected objectType=%v; actual %v", false, metric.objectType) @@ -50,7 +61,7 @@ func TestInitialiseMetrics(t *testing.T) { t.Errorf("Expected values-size=%d; actual %d", 0, len(metric.values)) } } - _, ok = metrics["ClassName/Type2Name/Element2Name"] + _, ok = metrics[testKey2] if ok { t.Errorf("Unexpected metric found in map, %%s object topics should be ignored") } @@ -60,6 +71,19 @@ func TestInitialiseMetrics(t *testing.T) { } } +func TestInitialiseMetrics_UnexpectedKey(t *testing.T) { + + teardownTestCase := setupTestCase(false) + defer teardownTestCase() + + mqmetric.Metrics.Classes[0].Types[0].Elements[0].Description = "New Metric" + _, err := initialiseMetrics(getTestLogger()) + + if err == nil { + t.Error("Expected skipping metric error") + } +} + func TestInitialiseMetrics_DuplicateKeys(t *testing.T) { teardownTestCase := setupTestCase(true) @@ -80,7 +104,7 @@ func TestUpdateMetrics(t *testing.T) { metrics, _ := initialiseMetrics(getTestLogger()) updateMetrics(metrics) - metric, _ := metrics["ClassName/Type1Name/Element1Name"] + metric, _ := metrics[testKey1] actual, ok := metric.values[qmgrLabelValue] if !ok { @@ -110,13 +134,32 @@ func TestMakeKey(t *testing.T) { teardownTestCase := setupTestCase(false) defer teardownTestCase() - expected := "ClassName/Type1Name/Element1Name" + expected := testKey1 actual := makeKey(mqmetric.Metrics.Classes[0].Types[0].Elements[0]) if actual != expected { t.Errorf("Expected value=%s; actual %s", expected, actual) } } +func TestGenerateMetricNamesMap(t *testing.T) { + + metricNamesMap := generateMetricNamesMap() + + if len(metricNamesMap) != 93 { + t.Errorf("Expected mapping-size=%d; actual %d", 93, len(metricNamesMap)) + } + + actual, ok := metricNamesMap[testKey1] + + if !ok { + t.Errorf("No metric name mapping found for %s", testKey1) + } else { + if actual != testElement1Name { + t.Errorf("Expected metric name=%s; actual %s", testElement1Name, actual) + } + } +} + func setupTestCase(duplicateKey bool) func() { populateTestMetrics(1, duplicateKey) return func() { @@ -132,15 +175,15 @@ func populateTestMetrics(testValue int, duplicateKey bool) { metricElement1 := new(mqmetric.MonElement) metricElement2 := new(mqmetric.MonElement) - metricClass.Name = "ClassName" - metricType1.Name = "Type1Name" - metricType2.Name = "Type2Name" + metricClass.Name = testClassName + metricType1.Name = testTypeName + metricType2.Name = testTypeName metricElement1.MetricName = "Element1Name" - metricElement1.Description = "Element1Description" + metricElement1.Description = testElement1Description metricElement1.Values = make(map[string]int64) metricElement1.Values[qmgrLabelValue] = int64(testValue) metricElement2.MetricName = "Element2Name" - metricElement2.Description = "Element2Description" + metricElement2.Description = testElement2Description metricElement2.Values = make(map[string]int64) metricType1.ObjectTopic = "ObjectTopic" metricType2.ObjectTopic = "%s"