first commit

This commit is contained in:
2024-10-28 23:04:48 +01:00
commit 1ee55157f1
911 changed files with 325331 additions and 0 deletions

View File

@@ -0,0 +1,191 @@
/*
© Copyright IBM Corporation 2018, 2019
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics contains code to provide metrics for the queue manager
package metrics
import (
"github.com/ibm-messaging/mq-container/pkg/logger"
"github.com/prometheus/client_golang/prometheus"
)
const (
namespace = "ibmmq"
qmgrPrefix = "qmgr"
qmgrLabel = "qmgr"
objectPrefix = "object"
objectLabel = "object"
)
type exporter struct {
qmName string
gaugeMap map[string]*prometheus.GaugeVec
counterMap map[string]*prometheus.CounterVec
firstCollect bool
log *logger.Logger
}
func newExporter(qmName string, log *logger.Logger) *exporter {
return &exporter{
qmName: qmName,
gaugeMap: make(map[string]*prometheus.GaugeVec),
counterMap: make(map[string]*prometheus.CounterVec),
firstCollect: true,
log: log,
}
}
// Describe provides details of all available metrics
func (e *exporter) Describe(ch chan<- *prometheus.Desc) {
requestChannel <- false
response := <-responseChannel
for key, metric := range response {
if metric.isDelta {
// For delta type metrics - allocate a Prometheus Counter
counterVec := createCounterVec(metric.name, metric.description, metric.objectType)
e.counterMap[key] = counterVec
// Describe metric
counterVec.Describe(ch)
} else {
// For non-delta type metrics - allocate a Prometheus Gauge
gaugeVec := createGaugeVec(metric.name, metric.description, metric.objectType)
e.gaugeMap[key] = gaugeVec
// Describe metric
gaugeVec.Describe(ch)
}
}
}
// Collect is called at regular intervals to provide the current metric data
func (e *exporter) Collect(ch chan<- prometheus.Metric) {
requestChannel <- true
response := <-responseChannel
for key, metric := range response {
if metric.isDelta {
// For delta type metrics - update their Prometheus Counter
counterVec := e.counterMap[key]
// Populate Prometheus Counter with metric values
// - Skip on first collect to avoid build-up of accumulated values
if !e.firstCollect {
for label, value := range metric.values {
var err error
var counter prometheus.Counter
if label == qmgrLabelValue {
counter, err = counterVec.GetMetricWithLabelValues(e.qmName)
} else {
counter, err = counterVec.GetMetricWithLabelValues(label, e.qmName)
}
if err == nil {
counter.Add(value)
} else {
e.log.Errorf("Metrics Error: %s", err.Error())
}
}
}
// Collect metric
counterVec.Collect(ch)
} else {
// For non-delta type metrics - reset their Prometheus Gauge
gaugeVec := e.gaugeMap[key]
gaugeVec.Reset()
// Populate Prometheus Gauge with metric values
// - Skip on first collect to avoid build-up of accumulated values
if !e.firstCollect {
for label, value := range metric.values {
var err error
var gauge prometheus.Gauge
if label == qmgrLabelValue {
gauge, err = gaugeVec.GetMetricWithLabelValues(e.qmName)
} else {
gauge, err = gaugeVec.GetMetricWithLabelValues(label, e.qmName)
}
if err == nil {
gauge.Set(value)
} else {
e.log.Errorf("Metrics Error: %s", err.Error())
}
}
}
// Collect metric
gaugeVec.Collect(ch)
}
}
if e.firstCollect {
e.firstCollect = false
}
}
// createCounterVec returns a Prometheus CounterVec populated with metric details
func createCounterVec(name, description string, objectType bool) *prometheus.CounterVec {
prefix, labels := getVecDetails(objectType)
counterVec := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: prefix + "_" + name,
Help: description,
},
labels,
)
return counterVec
}
// createGaugeVec returns a Prometheus GaugeVec populated with metric details
func createGaugeVec(name, description string, objectType bool) *prometheus.GaugeVec {
prefix, labels := getVecDetails(objectType)
gaugeVec := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: prefix + "_" + name,
Help: description,
},
labels,
)
return gaugeVec
}
// getVecDetails returns the required prefix and labels for a metric
func getVecDetails(objectType bool) (prefix string, labels []string) {
prefix = qmgrPrefix
labels = []string{qmgrLabel}
if objectType {
prefix = objectPrefix
labels = []string{objectLabel, qmgrLabel}
}
return prefix, labels
}

View File

@@ -0,0 +1,204 @@
/*
© Copyright IBM Corporation 2018
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"testing"
"time"
"github.com/ibm-messaging/mq-golang/ibmmq"
"github.com/ibm-messaging/mq-golang/mqmetric"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
func TestDescribe_Counter(t *testing.T) {
testDescribe(t, true)
}
func TestDescribe_Gauge(t *testing.T) {
testDescribe(t, false)
}
func testDescribe(t *testing.T, isDelta bool) {
teardownTestCase := setupTestCase(false)
defer teardownTestCase()
log := getTestLogger()
ch := make(chan *prometheus.Desc)
go func() {
exporter := newExporter("qmName", log)
exporter.Describe(ch)
}()
collect := <-requestChannel
if collect {
t.Errorf("Received unexpected collect request")
}
if isDelta {
mqmetric.Metrics.Classes[0].Types[0].Elements[0].Datatype = ibmmq.MQIAMO_MONITOR_DELTA
}
metrics, _ := initialiseMetrics(log)
responseChannel <- metrics
select {
case prometheusDesc := <-ch:
expected := "Desc{fqName: \"ibmmq_qmgr_" + testElement1Name + "\", help: \"" + testElement1Description + "\", constLabels: {}, variableLabels: [qmgr]}"
actual := prometheusDesc.String()
if actual != expected {
t.Errorf("Expected value=%s; actual %s", expected, actual)
}
case <-time.After(1 * time.Second):
t.Error("Did not receive channel response from describe")
}
}
func TestCollect_Counter(t *testing.T) {
testCollect(t, true)
}
func TestCollect_Gauge(t *testing.T) {
testCollect(t, false)
}
func testCollect(t *testing.T, isDelta bool) {
teardownTestCase := setupTestCase(false)
defer teardownTestCase()
log := getTestLogger()
exporter := newExporter("qmName", log)
if isDelta {
exporter.counterMap[testKey1] = createCounterVec(testElement1Name, testElement1Description, false)
} else {
exporter.gaugeMap[testKey1] = createGaugeVec(testElement1Name, testElement1Description, false)
}
for i := 1; i <= 3; i++ {
ch := make(chan prometheus.Metric)
go func() {
exporter.Collect(ch)
close(ch)
}()
collect := <-requestChannel
if !collect {
t.Errorf("Received unexpected describe request")
}
populateTestMetrics(i, false)
if isDelta {
mqmetric.Metrics.Classes[0].Types[0].Elements[0].Datatype = ibmmq.MQIAMO_MONITOR_DELTA
}
metrics, _ := initialiseMetrics(log)
updateMetrics(metrics)
responseChannel <- metrics
select {
case <-ch:
var actual float64
prometheusMetric := dto.Metric{}
if isDelta {
exporter.counterMap[testKey1].WithLabelValues("qmName").Write(&prometheusMetric)
actual = prometheusMetric.GetCounter().GetValue()
} else {
exporter.gaugeMap[testKey1].WithLabelValues("qmName").Write(&prometheusMetric)
actual = prometheusMetric.GetGauge().GetValue()
}
if i == 1 {
if actual != float64(0) {
t.Errorf("Expected values to be zero on first collect; actual %f", actual)
}
} else if isDelta && i != 2 {
if actual != float64(i+(i-1)) {
t.Errorf("Expected value=%f; actual %f", float64(i+(i-1)), actual)
}
} else if actual != float64(i) {
t.Errorf("Expected value=%f; actual %f", float64(i), actual)
}
case <-time.After(1 * time.Second):
t.Error("Did not receive channel response from collect")
}
}
}
func TestCreateCounterVec(t *testing.T) {
ch := make(chan *prometheus.Desc)
counterVec := createCounterVec("MetricName", "MetricDescription", false)
go func() {
counterVec.Describe(ch)
}()
description := <-ch
expected := "Desc{fqName: \"ibmmq_qmgr_MetricName\", help: \"MetricDescription\", constLabels: {}, variableLabels: [qmgr]}"
actual := description.String()
if actual != expected {
t.Errorf("Expected value=%s; actual %s", expected, actual)
}
}
func TestCreateCounterVec_ObjectLabel(t *testing.T) {
ch := make(chan *prometheus.Desc)
counterVec := createCounterVec("MetricName", "MetricDescription", true)
go func() {
counterVec.Describe(ch)
}()
description := <-ch
expected := "Desc{fqName: \"ibmmq_object_MetricName\", help: \"MetricDescription\", constLabels: {}, variableLabels: [object qmgr]}"
actual := description.String()
if actual != expected {
t.Errorf("Expected value=%s; actual %s", expected, actual)
}
}
func TestCreateGaugeVec(t *testing.T) {
ch := make(chan *prometheus.Desc)
gaugeVec := createGaugeVec("MetricName", "MetricDescription", false)
go func() {
gaugeVec.Describe(ch)
}()
description := <-ch
expected := "Desc{fqName: \"ibmmq_qmgr_MetricName\", help: \"MetricDescription\", constLabels: {}, variableLabels: [qmgr]}"
actual := description.String()
if actual != expected {
t.Errorf("Expected value=%s; actual %s", expected, actual)
}
}
func TestCreateGaugeVec_ObjectLabel(t *testing.T) {
ch := make(chan *prometheus.Desc)
gaugeVec := createGaugeVec("MetricName", "MetricDescription", true)
go func() {
gaugeVec.Describe(ch)
}()
description := <-ch
expected := "Desc{fqName: \"ibmmq_object_MetricName\", help: \"MetricDescription\", constLabels: {}, variableLabels: [object qmgr]}"
actual := description.String()
if actual != expected {
t.Errorf("Expected value=%s; actual %s", expected, actual)
}
}

124
internal/metrics/mapping.go Normal file
View File

@@ -0,0 +1,124 @@
/*
© Copyright IBM Corporation 2018
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics contains code to provide metrics for the queue manager
package metrics
type metricLookup struct {
name string
enabled bool
}
// generateMetricNamesMap generates metric names mapped from their description
func generateMetricNamesMap() map[string]metricLookup {
metricNamesMap := map[string]metricLookup{
"CPU/SystemSummary/CPU load - one minute average": metricLookup{"cpu_load_one_minute_average_percentage", true},
"CPU/SystemSummary/CPU load - five minute average": metricLookup{"cpu_load_five_minute_average_percentage", true},
"CPU/SystemSummary/CPU load - fifteen minute average": metricLookup{"cpu_load_fifteen_minute_average_percentage", true},
"CPU/SystemSummary/System CPU time percentage": metricLookup{"system_cpu_time_percentage", true},
"CPU/SystemSummary/User CPU time percentage": metricLookup{"user_cpu_time_percentage", true},
"CPU/SystemSummary/RAM free percentage": metricLookup{"ram_free_percentage", true},
"CPU/SystemSummary/RAM total bytes": metricLookup{"system_ram_size_bytes", false},
"CPU/QMgrSummary/System CPU time - percentage estimate for queue manager": metricLookup{"system_cpu_time_estimate_for_queue_manager_percentage", true},
"CPU/QMgrSummary/User CPU time - percentage estimate for queue manager": metricLookup{"user_cpu_time_estimate_for_queue_manager_percentage", true},
"CPU/QMgrSummary/RAM total bytes - estimate for queue manager": metricLookup{"ram_usage_estimate_for_queue_manager_bytes", true},
"DISK/SystemSummary/MQ trace file system - free space": metricLookup{"trace_file_system_free_space_percentage", true},
"DISK/SystemSummary/MQ trace file system - bytes in use": metricLookup{"trace_file_system_in_use_bytes", true},
"DISK/SystemSummary/MQ errors file system - free space": metricLookup{"errors_file_system_free_space_percentage", true},
"DISK/SystemSummary/MQ errors file system - bytes in use": metricLookup{"errors_file_system_in_use_bytes", true},
"DISK/SystemSummary/MQ FDC file count": metricLookup{"fdc_files", true},
"DISK/QMgrSummary/Queue Manager file system - free space": metricLookup{"queue_manager_file_system_free_space_percentage", true},
"DISK/QMgrSummary/Queue Manager file system - bytes in use": metricLookup{"queue_manager_file_system_in_use_bytes", true},
"DISK/Log/Log - logical bytes written": metricLookup{"log_logical_written_bytes_total", true},
"DISK/Log/Log - physical bytes written": metricLookup{"log_physical_written_bytes_total", true},
"DISK/Log/Log - current primary space in use": metricLookup{"log_primary_space_in_use_percentage", true},
"DISK/Log/Log - workload primary space utilization": metricLookup{"log_workload_primary_space_utilization_percentage", true},
"DISK/Log/Log - write latency": metricLookup{"log_write_latency_seconds", true},
"DISK/Log/Log - bytes max": metricLookup{"log_max_bytes", true},
"DISK/Log/Log - write size": metricLookup{"log_write_size_bytes", true},
"DISK/Log/Log - bytes in use": metricLookup{"log_in_use_bytes", true},
"DISK/Log/Log file system - bytes max": metricLookup{"log_file_system_max_bytes", true},
"DISK/Log/Log file system - bytes in use": metricLookup{"log_file_system_in_use_bytes", true},
"DISK/Log/Log - bytes occupied by reusable extents": metricLookup{"log_occupied_by_reusable_extents_bytes", true},
"DISK/Log/Log - bytes occupied by extents waiting to be archived": metricLookup{"log_occupied_by_extents_waiting_to_be_archived_bytes", true},
"DISK/Log/Log - bytes required for media recovery": metricLookup{"log_required_for_media_recovery_bytes", true},
"STATMQI/SUBSCRIBE/Create durable subscription count": metricLookup{"durable_subscription_create_total", true},
"STATMQI/SUBSCRIBE/Alter durable subscription count": metricLookup{"durable_subscription_alter_total", true},
"STATMQI/SUBSCRIBE/Resume durable subscription count": metricLookup{"durable_subscription_resume_total", true},
"STATMQI/SUBSCRIBE/Delete durable subscription count": metricLookup{"durable_subscription_delete_total", true},
"STATMQI/SUBSCRIBE/Create non-durable subscription count": metricLookup{"non_durable_subscription_create_total", true},
"STATMQI/SUBSCRIBE/Delete non-durable subscription count": metricLookup{"non_durable_subscription_delete_total", true},
"STATMQI/SUBSCRIBE/Failed create/alter/resume subscription count": metricLookup{"failed_subscription_create_alter_resume_total", true},
"STATMQI/SUBSCRIBE/Subscription delete failure count": metricLookup{"failed_subscription_delete_total", true},
"STATMQI/SUBSCRIBE/MQSUBRQ count": metricLookup{"mqsubrq_total", true},
"STATMQI/SUBSCRIBE/Failed MQSUBRQ count": metricLookup{"failed_mqsubrq_total", true},
"STATMQI/SUBSCRIBE/Durable subscriber - high water mark": metricLookup{"durable_subscriber_high_water_mark", false},
"STATMQI/SUBSCRIBE/Durable subscriber - low water mark": metricLookup{"durable_subscriber_low_water_mark", false},
"STATMQI/SUBSCRIBE/Non-durable subscriber - high water mark": metricLookup{"non_durable_subscriber_high_water_mark", false},
"STATMQI/SUBSCRIBE/Non-durable subscriber - low water mark": metricLookup{"non_durable_subscriber_low_water_mark", false},
"STATMQI/PUBLISH/Topic MQPUT/MQPUT1 interval total": metricLookup{"topic_mqput_mqput1_total", true},
"STATMQI/PUBLISH/Interval total topic bytes put": metricLookup{"topic_put_bytes_total", true},
"STATMQI/PUBLISH/Failed topic MQPUT/MQPUT1 count": metricLookup{"failed_topic_mqput_mqput1_total", true},
"STATMQI/PUBLISH/Persistent - topic MQPUT/MQPUT1 count": metricLookup{"persistent_topic_mqput_mqput1_total", true},
"STATMQI/PUBLISH/Non-persistent - topic MQPUT/MQPUT1 count": metricLookup{"non_persistent_topic_mqput_mqput1_total", true},
"STATMQI/PUBLISH/Published to subscribers - message count": metricLookup{"published_to_subscribers_message_total", true},
"STATMQI/PUBLISH/Published to subscribers - byte count": metricLookup{"published_to_subscribers_bytes_total", true},
"STATMQI/CONNDISC/MQCONN/MQCONNX count": metricLookup{"mqconn_mqconnx_total", true},
"STATMQI/CONNDISC/Failed MQCONN/MQCONNX count": metricLookup{"failed_mqconn_mqconnx_total", true},
"STATMQI/CONNDISC/MQDISC count": metricLookup{"mqdisc_total", true},
"STATMQI/CONNDISC/Concurrent connections - high water mark": metricLookup{"concurrent_connections_high_water_mark", false},
"STATMQI/OPENCLOSE/MQOPEN count": metricLookup{"mqopen_total", true},
"STATMQI/OPENCLOSE/Failed MQOPEN count": metricLookup{"failed_mqopen_total", true},
"STATMQI/OPENCLOSE/MQCLOSE count": metricLookup{"mqclose_total", true},
"STATMQI/OPENCLOSE/Failed MQCLOSE count": metricLookup{"failed_mqclose_total", true},
"STATMQI/INQSET/MQINQ count": metricLookup{"mqinq_total", true},
"STATMQI/INQSET/Failed MQINQ count": metricLookup{"failed_mqinq_total", true},
"STATMQI/INQSET/MQSET count": metricLookup{"mqset_total", true},
"STATMQI/INQSET/Failed MQSET count": metricLookup{"failed_mqset_total", true},
"STATMQI/PUT/Persistent message MQPUT count": metricLookup{"persistent_message_mqput_total", true},
"STATMQI/PUT/Persistent message MQPUT1 count": metricLookup{"persistent_message_mqput1_total", true},
"STATMQI/PUT/Put persistent messages - byte count": metricLookup{"persistent_message_put_bytes_total", true},
"STATMQI/PUT/Non-persistent message MQPUT count": metricLookup{"non_persistent_message_mqput_total", true},
"STATMQI/PUT/Non-persistent message MQPUT1 count": metricLookup{"non_persistent_message_mqput1_total", true},
"STATMQI/PUT/Put non-persistent messages - byte count": metricLookup{"non_persistent_message_put_bytes_total", true},
"STATMQI/PUT/Interval total MQPUT/MQPUT1 count": metricLookup{"mqput_mqput1_total", true},
"STATMQI/PUT/Interval total MQPUT/MQPUT1 byte count": metricLookup{"mqput_mqput1_bytes_total", true},
"STATMQI/PUT/Failed MQPUT count": metricLookup{"failed_mqput_total", true},
"STATMQI/PUT/Failed MQPUT1 count": metricLookup{"failed_mqput1_total", true},
"STATMQI/PUT/MQSTAT count": metricLookup{"mqstat_total", true},
"STATMQI/GET/Persistent message destructive get - count": metricLookup{"persistent_message_destructive_get_total", true},
"STATMQI/GET/Persistent message browse - count": metricLookup{"persistent_message_browse_total", true},
"STATMQI/GET/Got persistent messages - byte count": metricLookup{"persistent_message_get_bytes_total", true},
"STATMQI/GET/Persistent message browse - byte count": metricLookup{"persistent_message_browse_bytes_total", true},
"STATMQI/GET/Non-persistent message destructive get - count": metricLookup{"non_persistent_message_destructive_get_total", true},
"STATMQI/GET/Non-persistent message browse - count": metricLookup{"non_persistent_message_browse_total", true},
"STATMQI/GET/Got non-persistent messages - byte count": metricLookup{"non_persistent_message_get_bytes_total", true},
"STATMQI/GET/Non-persistent message browse - byte count": metricLookup{"non_persistent_message_browse_bytes_total", true},
"STATMQI/GET/Interval total destructive get- count": metricLookup{"destructive_get_total", true},
"STATMQI/GET/Interval total destructive get - byte count": metricLookup{"destructive_get_bytes_total", true},
"STATMQI/GET/Failed MQGET - count": metricLookup{"failed_mqget_total", true},
"STATMQI/GET/Failed browse count": metricLookup{"failed_browse_total", true},
"STATMQI/GET/MQCTL count": metricLookup{"mqctl_total", true},
"STATMQI/GET/Expired message count": metricLookup{"expired_message_total", true},
"STATMQI/GET/Purged queue count": metricLookup{"purged_queue_total", true},
"STATMQI/GET/MQCB count": metricLookup{"mqcb_total", true},
"STATMQI/GET/Failed MQCB count": metricLookup{"failed_mqcb_total", true},
"STATMQI/SYNCPOINT/Commit count": metricLookup{"commit_total", true},
"STATMQI/SYNCPOINT/Rollback count": metricLookup{"rollback_total", true},
}
return metricNamesMap
}

View File

@@ -0,0 +1,37 @@
/*
© Copyright IBM Corporation 2018
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import "testing"
func TestGenerateMetricNamesMap(t *testing.T) {
metricNamesMap := generateMetricNamesMap()
if len(metricNamesMap) != 93 {
t.Errorf("Expected mapping-size=%d; actual %d", 93, len(metricNamesMap))
}
actual, ok := metricNamesMap[testKey1]
if !ok {
t.Errorf("No metric name mapping found for %s", testKey1)
} else {
if actual.name != testElement1Name {
t.Errorf("Expected metric name=%s; actual %s", testElement1Name, actual.name)
}
}
}

123
internal/metrics/metrics.go Normal file
View File

@@ -0,0 +1,123 @@
/*
© Copyright IBM Corporation 2018, 2023
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics contains code to provide metrics for the queue manager
package metrics
import (
"context"
"fmt"
"net/http"
"time"
"github.com/ibm-messaging/mq-container/internal/ready"
"github.com/ibm-messaging/mq-container/pkg/logger"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const (
defaultPort = "9157"
)
var (
metricsEnabled = false
// #nosec G112 - this code is changing soon to use https.
// for now we will ignore the gosec.
metricsServer = &http.Server{Addr: ":" + defaultPort}
)
// GatherMetrics gathers metrics for the queue manager
func GatherMetrics(qmName string, log *logger.Logger) {
// If running in standby mode - wait until the queue manager becomes active
for {
status, _ := ready.Status(context.Background(), qmName)
if status.ActiveQM() {
break
}
time.Sleep(requestTimeout * time.Second)
}
metricsEnabled = true
err := startMetricsGathering(qmName, log)
if err != nil {
log.Errorf("Metrics Error: %s", err.Error())
StopMetricsGathering(log)
}
}
// startMetricsGathering starts gathering metrics for the queue manager
func startMetricsGathering(qmName string, log *logger.Logger) error {
defer func() {
if r := recover(); r != nil {
log.Errorf("Metrics Error: %v", r)
}
}()
log.Println("Starting metrics gathering")
// Start processing metrics
go processMetrics(log, qmName)
// Wait for metrics to be ready before starting the Prometheus handler
<-startChannel
// Register metrics
metricsExporter := newExporter(qmName, log)
err := prometheus.Register(metricsExporter)
if err != nil {
return fmt.Errorf("Failed to register metrics: %v", err)
}
// Setup HTTP server to handle requests from Prometheus
http.Handle("/metrics", promhttp.Handler())
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
// #nosec G104
w.Write([]byte("Status: METRICS ACTIVE"))
})
go func() {
err = metricsServer.ListenAndServe()
if err != nil && err != http.ErrServerClosed {
log.Errorf("Metrics Error: Failed to handle metrics request: %v", err)
StopMetricsGathering(log)
}
}()
return nil
}
// StopMetricsGathering stops gathering metrics for the queue manager
func StopMetricsGathering(log *logger.Logger) {
if metricsEnabled {
// Stop processing metrics
stopChannel <- true
// Shutdown HTTP server
timeout, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := metricsServer.Shutdown(timeout)
if err != nil {
log.Errorf("Failed to shutdown metrics server: %v", err)
}
}
}

227
internal/metrics/update.go Normal file
View File

@@ -0,0 +1,227 @@
/*
© Copyright IBM Corporation 2018, 2019
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics contains code to provide metrics for the queue manager
package metrics
import (
"fmt"
"strings"
"time"
"github.com/ibm-messaging/mq-container/pkg/logger"
"github.com/ibm-messaging/mq-golang/ibmmq"
"github.com/ibm-messaging/mq-golang/mqmetric"
)
const (
qmgrLabelValue = mqmetric.QMgrMapKey
requestTimeout = 10
)
var (
startChannel = make(chan bool)
stopChannel = make(chan bool, 2)
requestChannel = make(chan bool)
responseChannel = make(chan map[string]*metricData)
)
type metricData struct {
name string
description string
objectType bool
values map[string]float64
isDelta bool
}
// processMetrics processes publications of metric data and handles describe/collect/stop requests
func processMetrics(log *logger.Logger, qmName string) {
var err error
var firstConnect = true
var metrics map[string]*metricData
for {
// Connect to queue manager and discover available metrics
err = doConnect(qmName)
if err == nil {
if firstConnect {
firstConnect = false
startChannel <- true
}
// #nosec G104
metrics, _ = initialiseMetrics(log)
}
// Now loop until something goes wrong
for err == nil {
// Process publications of metric data
// TODO: If we have a large number of metrics to process, then we could be blocked from responding to stop requests
err = mqmetric.ProcessPublications()
// Handle describe/collect/stop requests
if err == nil {
select {
case collect := <-requestChannel:
if collect {
updateMetrics(metrics)
}
responseChannel <- metrics
case <-stopChannel:
log.Println("Stopping metrics gathering")
mqmetric.EndConnection()
return
case <-time.After(requestTimeout * time.Second):
log.Debugf("Metrics: No requests received within timeout period (%d seconds)", requestTimeout)
}
}
}
log.Errorf("Metrics Error: %s", err.Error())
// Close the connection
mqmetric.EndConnection()
// Handle stop requests
select {
case <-stopChannel:
log.Println("Stopping metrics gathering")
return
case <-time.After(requestTimeout * time.Second):
log.Println("Retrying metrics gathering")
}
}
}
// doConnect connects to the queue manager and discovers available metrics
func doConnect(qmName string) error {
// Set connection configuration
var connConfig mqmetric.ConnectionConfig
connConfig.ClientMode = false
connConfig.UserId = ""
connConfig.Password = ""
// Connect to the queue manager - open the command and dynamic reply queues
err := mqmetric.InitConnectionStats(qmName, "SYSTEM.DEFAULT.MODEL.QUEUE", "", &connConfig)
if err != nil {
return fmt.Errorf("Failed to connect to queue manager %s: %v", qmName, err)
}
// Discover available metrics for the queue manager and subscribe to them
err = mqmetric.DiscoverAndSubscribe("", true, "")
if err != nil {
return fmt.Errorf("Failed to discover and subscribe to metrics: %v", err)
}
return nil
}
// initialiseMetrics sets initial details for all available metrics
func initialiseMetrics(log *logger.Logger) (map[string]*metricData, error) {
metrics := make(map[string]*metricData)
validMetrics := true
metricNamesMap := generateMetricNamesMap()
for _, metricClass := range mqmetric.Metrics.Classes {
for _, metricType := range metricClass.Types {
if !strings.Contains(metricType.ObjectTopic, "%s") {
for _, metricElement := range metricType.Elements {
// Get unique metric key
key := makeKey(metricElement)
// Get metric name from mapping
if metricLookup, found := metricNamesMap[key]; found {
// Check if metric is enabled
if metricLookup.enabled {
// Check if metric is a delta type
isDelta := false
if metricElement.Datatype == ibmmq.MQIAMO_MONITOR_DELTA {
isDelta = true
}
// Set metric details
metric := metricData{
name: metricLookup.name,
description: metricElement.Description,
isDelta: isDelta,
}
// Add metric
if _, exists := metrics[key]; !exists {
metrics[key] = &metric
} else {
log.Errorf("Metrics Error: Found duplicate metric key [%s]", key)
validMetrics = false
}
} else {
log.Debugf("Metrics: Skipping metric, metric is not enabled for key [%s]", key)
}
} else {
log.Errorf("Metrics Error: Skipping metric, unexpected key [%s]", key)
validMetrics = false
}
}
}
}
}
if !validMetrics {
return metrics, fmt.Errorf("Invalid metrics data")
}
return metrics, nil
}
// updateMetrics updates values for all available metrics
func updateMetrics(metrics map[string]*metricData) {
for _, metricClass := range mqmetric.Metrics.Classes {
for _, metricType := range metricClass.Types {
if !strings.Contains(metricType.ObjectTopic, "%s") {
for _, metricElement := range metricType.Elements {
// Unexpected metric elements (with no defined mapping) are handled in 'initialiseMetrics'
// - if any exist, they are logged as errors and skipped (they are not added to the metrics map)
// Therefore we can ignore handling any unexpected metric elements found here
// - this avoids us logging excessive errors, as this function is called frequently
metric, ok := metrics[makeKey(metricElement)]
if ok {
// Clear existing metric values
metric.values = make(map[string]float64)
// Update metric with cached values of publication data
for label, value := range metricElement.Values {
normalisedValue := mqmetric.Normalise(metricElement, label, value)
metric.values[label] = normalisedValue
}
}
// Reset cached values of publication data for this metric
metricElement.Values = make(map[string]int64)
}
}
}
}
}
// makeKey builds a unique key for each metric
func makeKey(metricElement *mqmetric.MonElement) string {
return metricElement.Parent.Parent.Name + "/" + metricElement.Parent.Name + "/" + metricElement.Description
}

View File

@@ -0,0 +1,197 @@
/*
© Copyright IBM Corporation 2018, 2019
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"os"
"testing"
"github.com/ibm-messaging/mq-container/pkg/logger"
"github.com/ibm-messaging/mq-golang/mqmetric"
)
const (
testClassName = "CPU"
testTypeName = "SystemSummary"
testElement1Name = "cpu_load_five_minute_average_percentage"
testElement2Name = "cpu_load_fifteen_minute_average_percentage"
testElement1Description = "CPU load - five minute average"
testElement2Description = "CPU load - fifteen minute average"
testKey1 = testClassName + "/" + testTypeName + "/" + testElement1Description
testKey2 = testClassName + "/" + testTypeName + "/" + testElement2Description
)
func TestInitialiseMetrics(t *testing.T) {
teardownTestCase := setupTestCase(false)
defer teardownTestCase()
metrics, err := initialiseMetrics(getTestLogger())
metric, ok := metrics[testKey1]
if err != nil {
t.Errorf("Unexpected error %s", err.Error())
}
if !ok {
t.Error("Expected metric not found in map")
} else {
if metric.name != testElement1Name {
t.Errorf("Expected name=%s; actual %s", testElement1Name, metric.name)
}
if metric.description != testElement1Description {
t.Errorf("Expected description=%s; actual %s", testElement1Description, metric.description)
}
if metric.objectType != false {
t.Errorf("Expected objectType=%v; actual %v", false, metric.objectType)
}
if len(metric.values) != 0 {
t.Errorf("Expected values-size=%d; actual %d", 0, len(metric.values))
}
}
_, ok = metrics[testKey2]
if ok {
t.Errorf("Unexpected metric found in map, %%s object topics should be ignored")
}
if len(metrics) != 1 {
t.Errorf("Map contains unexpected metrics, map size=%d", len(metrics))
}
}
func TestInitialiseMetrics_UnexpectedKey(t *testing.T) {
teardownTestCase := setupTestCase(false)
defer teardownTestCase()
mqmetric.Metrics.Classes[0].Types[0].Elements[0].Description = "New Metric"
_, err := initialiseMetrics(getTestLogger())
if err == nil {
t.Error("Expected skipping metric error")
}
}
func TestInitialiseMetrics_DuplicateKeys(t *testing.T) {
teardownTestCase := setupTestCase(true)
defer teardownTestCase()
_, err := initialiseMetrics(getTestLogger())
if err == nil {
t.Error("Expected duplicate keys error")
}
}
func TestUpdateMetrics(t *testing.T) {
teardownTestCase := setupTestCase(false)
defer teardownTestCase()
metrics, _ := initialiseMetrics(getTestLogger())
updateMetrics(metrics)
metric, _ := metrics[testKey1]
actual, ok := metric.values[qmgrLabelValue]
if !ok {
t.Error("No metric values found for queue manager label")
} else {
if actual != float64(1) {
t.Errorf("Expected metric value=%f; actual %f", float64(1), actual)
}
if len(metric.values) != 1 {
t.Errorf("Expected values-size=%d; actual %d", 1, len(metric.values))
}
}
if len(mqmetric.Metrics.Classes[0].Types[0].Elements[0].Values) != 0 {
t.Error("Unexpected cached value; publication data should have been reset")
}
updateMetrics(metrics)
if len(metric.values) != 0 {
t.Errorf("Unexpected metric value; data should have been cleared")
}
}
func TestMakeKey(t *testing.T) {
teardownTestCase := setupTestCase(false)
defer teardownTestCase()
expected := testKey1
actual := makeKey(mqmetric.Metrics.Classes[0].Types[0].Elements[0])
if actual != expected {
t.Errorf("Expected value=%s; actual %s", expected, actual)
}
}
func setupTestCase(duplicateKey bool) func() {
populateTestMetrics(1, duplicateKey)
return func() {
cleanTestMetrics()
}
}
func populateTestMetrics(testValue int, duplicateKey bool) {
metricClass := new(mqmetric.MonClass)
metricType1 := new(mqmetric.MonType)
metricType2 := new(mqmetric.MonType)
metricElement1 := new(mqmetric.MonElement)
metricElement2 := new(mqmetric.MonElement)
metricClass.Name = testClassName
metricType1.Name = testTypeName
metricType2.Name = testTypeName
metricElement1.MetricName = "Element1Name"
metricElement1.Description = testElement1Description
metricElement1.Values = make(map[string]int64)
metricElement1.Values[qmgrLabelValue] = int64(testValue)
metricElement2.MetricName = "Element2Name"
metricElement2.Description = testElement2Description
metricElement2.Values = make(map[string]int64)
metricType1.ObjectTopic = "ObjectTopic"
metricType2.ObjectTopic = "%s"
metricElement1.Parent = metricType1
metricElement2.Parent = metricType2
metricType1.Parent = metricClass
metricType2.Parent = metricClass
metricType1.Elements = make(map[int]*mqmetric.MonElement)
metricType2.Elements = make(map[int]*mqmetric.MonElement)
metricType1.Elements[0] = metricElement1
if duplicateKey {
metricType1.Elements[1] = metricElement1
}
metricType2.Elements[0] = metricElement2
metricClass.Types = make(map[int]*mqmetric.MonType)
metricClass.Types[0] = metricType1
metricClass.Types[1] = metricType2
mqmetric.Metrics.Classes = make(map[int]*mqmetric.MonClass)
mqmetric.Metrics.Classes[0] = metricClass
}
func cleanTestMetrics() {
mqmetric.Metrics.Classes = make(map[int]*mqmetric.MonClass)
}
func getTestLogger() *logger.Logger {
log, _ := logger.NewLogger(os.Stdout, false, false, "test")
return log
}