Skip to content
Evan Carmi edited this page Apr 28, 2011 · 1 revision

We use munin to monitor roomtrol devices and some specific servers.

Note: this munin seems to act weirdly and not exactly as we wish. This could be a result of improper configuration. Thus, take all settings here with a grain of salt.

Install munin-node on the client device you want to monitor:

sudo apt-get install munin-node

Then configure it to be accessible by the server by adding the server's ip address to the allowed list in: /etc/munin/munin-node.conf:

allow ^129\.133\.124\.145$

Now add the client to the munin server's page by editing /etc/munin/munin.conf A sample is below:

# Example configuration file for Munin, generated by 'make build'

# The next three variables specifies where the location of the RRD
# databases, the HTML output, logs and the lock/pid files.  They all
# must be writable by the user running munin-cron.  They are all
# defaulted to the values you see here.
#
# dbdir	/var/lib/munin
# htmldir /var/cache/munin/www
# logdir /var/log/munin
# rundir  /var/run/munin
#
# Where to look for the HTML templates
# tmpldir	/etc/munin/templates

# (Exactly one) directory to include all files from.
#
includedir /etc/munin/munin-conf.d

# Make graphs show values per minute instead of per second
#graph_period minute

# Graphics files are normaly generated by munin-graph, no matter if
# the graphs are used or not.  You can change this to
# on-demand-graphing by following the instructions in
# http://munin.projects.linpro.no/wiki/CgiHowto
#
#graph_strategy cgi

# munin-cgi-graph is invoked by the web server up to very many times at the
# same time.  This is not optimal since it results in high CPU and memory
# consumption to the degree that the system can thrash.  Again the default is
# 6.  Most likely the optimal number for max_cgi_graph_jobs is the same as
# max_graph_jobs.
#
#munin_cgi_graph_jobs 6

# If the automatic CGI url is wrong for your system override it here:
#
#cgiurl_graph /cgi-bin/munin-cgi-graph

# munin-graph runs in parallel, the number of concurrent processes is
# 6.  If you want munin-graph to not be parallel set to 0.  If set too
# high it will slow down munin-graph.  Some experiments are needed to
# determine how many are optimal on your system.  On a multi-core
# system with good SCSI disks the number can probably be quite high.
# 
#max_graph_jobs 6

# Drop [email protected] and [email protected] an email everytime 
# something changes (OK -> WARNING, CRITICAL -> OK, etc)
#contact.someuser.command mail -s "Munin notification" [email protected]
#contact.anotheruser.command mail -s "Munin notification" [email protected]
#
# For those with Nagios, the following might come in handy. In addition,
# the services must be defined in the Nagios server as well.
#contact.nagios.command /usr/bin/send_nsca nagios.host.comm -c /etc/nsca.conf

# Define contacts
contact.micah.command mail -s "Roomtrol Munin Notification" [email protected]
contact.rob.command mail -s "Roomtrol Munin Notification" [email protected]
contact.evan.command mail -s "Roomtrol Munin Notification" [email protected]

# a simple host tree
[imsvm.production]
    address 129.133.124.145
    use_node_name yes

[sc509a.production]
    address 129.133.127.4
    use_node_name yes
    contacts micah rob evan
    cpu.warning 100
    load.warning 1
    load.critical 2

[allb004.production]
    address 129.133.127.194
    use_node_name yes
    contacts micah rob evan
    cpu.warning 100
    load.warning 1
    load.critical 2

[production;Production]
    update no

    load_graph.update no
    load_graph.graph_category System
    load_graph.graph_args --base 1000 -l 0 --units-exponent 3
    load_graph.graph_title Load Values
    load_graph.loadsum.label Load value
    load_graph.loadsum.stack allb004=allb004.production:load.load sc509a=sc509a.production:load.load

    cpu_graph.update no
    cpu_graph.graph_category System
    cpu_graph.graph_args --base 1000 -l 0 --units-exponent 1
    cpu_graph.graph_title CPU Usage
    cpu_graph.loadsum.label CPU Usage
    cpu_graph.loadsum.stack allb004=allb004.production:cpu.user sc509a=sc509a.production:cpu.user

    uptime_graph.update no
    uptime_graph.graph_category System
    uptime_graph.graph_args --base 1000 -l 0 --units-exponent 1
    uptime_graph.graph_title Uptimes
    uptime_graph.loadsum.label Uptime
    uptime_graph.loadsum.stack allb004=allb004.production:uptime.uptime sc509a=sc509a.production:uptime.uptime