Jira and Confluence Performance Monitoring Part Two


Continue with Jira and Confluence Performance Monitoring Part One, in this article I will explain a bit deeper how the Bot works which is most interesting part I think.

First of all, you need to have a Bot that is hooked to your Slack. I wrote my own Bot based on Hubot (Nodejs Express Framework). When I ask ‘confluence status’, the Bot checks the confluence real time status via JMX. And when I ask ‘confluence response-time’ or ‘confluence stuck-thread’, the Bot runs my pre-defined query against Splunk via API then sends the result back to Slack.  

To run JMX query, I just let Bot to call a simple bash script. Here is the sample code.

'use strict';
var splunkjs = require('splunk-sdk');
module.exports = function (robot) {
var service = new splunkjs.Service({
username: process.env.SPLUNK_USERNAME,
password: process.env.SPLUNK_PASSWORD,
scheme:"https",
host: process.env.SPLUNK_HOST,
port: process.env.SPLUNK_PORT
});
function splunk_login (msg) {
service.login(function(err, success) {
if (err) {
console.log(err);
msg.send("`Oops, something went wrong!`");
}
console.log("Login was successful: " + success);
return success
});
}
// Search query
var confluenceStuck = 'your search query here | parse here';
var confluenceResponseTime = 'your search query here | parse here | stats avg(response_time)';
// Set the search parameters
var searchParams = {
exec_mode: "normal",
earliest_time: "-5m"
};
// Run search
function splunk_search (searchQuery, msg) {
service.search(searchQuery, searchParams, function(err, job) {
// Display the job's search ID
console.log("Job SID: ", job.sid);
// Poll the status of the search job
job.track({period: 200}, {
done: function(job) {
console.log("Done!");
// Get the results and print them
job.results({}, function(err, results, job) {
//console.log(job.state());
//console.log(results);
var fields = results.fields;
if (fields.length == 0) {
msg.send("“`Nothing“`");
};
var rows = results.rows;
//console.log(rows);
for(var i = 0; i < rows.length; i++) {
var values = rows[i];
console.log("Row " + i + ": " + values);
for(var j = 0; j < values.length; j++) {
var field = fields[j];
var value = values[j];
console.log("field " + j + ": " + field)
if (field === '_raw') {
console.log(value.split('\n')[0]);
msg.send("“`" + value.split('\n')[0] + "“`");
}
if (field === 'avg(response_time)') {
console.log(value);
msg.send("“`" + value + "“`");
}
}
}
});
},
failed: function(job) {
console.log("Job failed")
msg.send("`Search job failed!`");
},
error: function(err) {
done(err);
msg.send("`Oops, something went wrong!`")
}
});
});
};
robot.respond(/confluence (stuck-thread|stuck threads)/i, function (msg) {
msg.send("What are the stuck threads in Confluence?");
if (splunk_login) {
splunk_search(confluenceStuck, msg);
}
else {
msg.send("`Login failed!`");
}
});
robot.respond(/confluence (response time|response-time)/i, function (msg) {
msg.send("What is Confluence average response time in the past 5 minutes?");
if (splunk_login) {
splunk_search(confluenceResponseTime, msg);
}
else {
msg.send("`Login failed!`");
}
});
robot.hear(/Confluence stuck thread is detected/ig, function (msg) {
msg.send("What are the stuck threads in Confluence?");
if (splunk_login) {
splunk_search(confluenceStuck, msg);
}
else {
msg.send("`Login failed!`");
}
});
}
view raw splunk_query.js hosted with ❤ by GitHub
# confluence_status.sh
#!/bin/bash
java -jar jmxterm-1.0.0-uber.jar -l $JMX_CONFLUENCE -u $JMX_USER -p $JMX_PASSWORD -i confluence_busy_threads  -o confluence_tmp > /dev/null 2 &&  sed -e /^$/d confluence_tmp
# confluence_busy_threads file
get -b Standalone:name="http-nio-8443",type=ThreadPool currentThreadsBusy currentThreadCount maxThreads connectionCount
get -b Confluence:name=SystemInformation DatabaseExampleLatency
get -b Confluence:name=RequestMetrics NumberOfRequestsInLastTenSeconds AverageExecutionTimeForLastTenRequests
get -b java.lang:type=OperatingSystem ProcessCpuLoad SystemCpuLoad SystemLoadAverage FreePhysicalMemorySize
get -b java.lang:type=Memory HeapMemoryUsage

In the sample codes you can see that the Bot also runs the JMX query when it hears the message ‘Confluence long response time is detected”. As I configured Splunk to send such message to Slack channel if a the response time is higher than a threshold. Isn’t it so easy 🙂

OK, next I will show you how to trigger a Spluk query via API. Here is the sample code.

/**
* Check confluence jmx status
*/
'use strict';
const exec = require('child_process').exec;
module.exports = function (robot) {
function app_status (app, msg) {
var child = exec('cd ' + __dirname + '; ./' + app + '_status.sh', function (error, stdout, stderr) {
if (error) {
console.log(error);
msg.send("`oops, something went wrong!`");
}
process.stdout.write(stdout);
process.stderr.write(stderr);
msg.send("“`https://"+app+"\n"+stdout+"“`");
});
}
robot.respond(/confluence status/i, function (msg) {
msg.send("Let me check Confluence for you, please wait…");
app_status('confluence', msg);
});
robot.hear(/Confluence long response time is detected/ig, function (msg) {
msg.send("Let me check how Confluence is doing, please wait…");
app_status('confluence', msg);
});
}

You can add more useful commands to Bot. Be creative!

Leave a comment