#!/usr/bin/perl
# -*- perl -*-

# Copyright (C) 2015-2018 Pirx Developers - https://pirx.dev/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

=head1 NAME

hq_snmp_host_ - Munin wildcard plugin to monitor host CPU, memory, swap
and storage via SNMP.

=head1 APPLICABLE SYSTEMS

Linux system with access to remote host through SNMP.

=head1 CONFIGURATION

This plugin requires special configuration to work properly. For each
SNMP host you must configure name which will be used by munin-node,
hostname or ip address and SNMP community name:

[hq_snmp_host_*]
  env.host1_name host1.domain.com
  env.host1_address 192.168.100.1
  env.host1_community public
  env.host2_name host2.domain.com
  env.host2_address 192.168.100.2
  env.host2_community something

Now symlink hq_snmp_host_ as hq_snmp_host_host1 and as hq_snmp_host_host2
in your munin plugins directory. Thats all.

Additionally you may configure warning levels for some graphs.
Add following to your munin-node config if necessary:

[hq_snmp_host_*]
  env.cpu_warning 80
  env.cpu_critical 90
  env.disk_warning 90
  env.disk_critical 95
  env.memory_warning 95
  env.memory_critical 99
  env.swap_warning 50
  env.swap_critical 80

=head1 VERSION

  20181011

=head1 MAGIC MARKERS

  #%# family=manual
  #%# capabilities=autoconf

=head1 BUGS

None.

=head1 AUTHOR

Pirx Developers - https://pirx.dev/

=head1 LICENSE

GPLv3

=cut

use strict;
use warnings;
use Net::SNMP qw(:ALL);
use Munin::Plugin;
use Encode qw(encode decode);

sub walk_oids;

# Handle autoconf
if(defined($ARGV[0]) and $ARGV[0] eq 'autoconf') {
  print("no\n");
  exit(0);
}

need_multigraph();

my ($graph_name) = $0 =~ /.*\/hq_snmp_host_(.*)$/;

if(!defined($graph_name) or $graph_name eq "") {
  print("Error: invalid symlink name.\n");
  exit(1);
}

# Set warning and critical thresholds
my $cpu_warning = 80;
my $cpu_critical = 90;
my $disk_warning = 90;
my $disk_critical = 95;
my $memory_warning = 95;
my $memory_critical = 99;
my $swap_warning = 50;
my $swap_critical = 80;
if(defined($ENV{'cpu_warning'})) {
  $cpu_warning = $ENV{'cpu_warning'};
}
if(defined($ENV{'cpu_critical'})) {
  $cpu_critical = $ENV{'cpu_critical'};
}
if(defined($ENV{'disk_warning'})) {
  $disk_warning = $ENV{'disk_warning'};
}
if(defined($ENV{'disk_critical'})) {
  $disk_critical = $ENV{'disk_critical'};
}
if(defined($ENV{'memory_warning'})) {
  $memory_warning = $ENV{'memory_warning'};
}
if(defined($ENV{'memory_critical'})) {
  $memory_critical = $ENV{'memory_critical'};
}
if(defined($ENV{'swap_warning'})) {
  $swap_warning = $ENV{'swap_warning'};
}
if(defined($ENV{'swap_critical'})) {
  $swap_critical = $ENV{'swap_critical'};
}

# Read config from ENV
my $host_name;
my $host_address;
my $host_community;
if(defined($ENV{$graph_name . '_name'})) {
  $host_name = $ENV{$graph_name . '_name'};
}
if(defined($ENV{$graph_name . '_address'})) {
  $host_address = $ENV{$graph_name . '_address'};
}
if(defined($ENV{$graph_name . '_community'})) {
  $host_community = $ENV{$graph_name . '_community'};
}

if(!defined($host_name) or $host_name eq "" or !defined($host_address) or $host_address eq "" or !defined($host_community) or $host_community eq "") {
  print("Error: incomplete configuration.\n");
  exit(1);
}

# Connect to SNMP server on host
my ($session, $error) = Net::SNMP->session(
  -hostname  => $host_address,
  -community => $host_community,
  -timeout => 30,
  -retries => 3,
);
if(!defined($session))
  {
  print("SNMP connection error. Exiting.\n");
  exit(1);
}

my %snmp_data;

my $system;

%snmp_data = %{walk_oids($session, "1.3.6.1.2.1.1.1")};
if(defined($snmp_data{'1.3.6.1.2.1.1.1.0'})) {
  if($snmp_data{'1.3.6.1.2.1.1.1.0'} =~ /Software: Windows .*Version/) {
    $system = "windows";
  }
  elsif($snmp_data{'1.3.6.1.2.1.1.1.0'} =~ /^Linux/) {
    $system = "linux";
  }
  elsif($snmp_data{'1.3.6.1.2.1.1.1.0'} =~ /^VMware ESXi/) {
    $system = "esxi";
  }
  elsif($snmp_data{'1.3.6.1.2.1.1.1.0'} =~ /^RouterOS/) {
    $system = "routeros";
  }
}

my $uptime;
%snmp_data = %{walk_oids($session, "1.3.6.1.2.1.25.1.1")};
if(defined($snmp_data{'1.3.6.1.2.1.25.1.1.0'})) {
  ($uptime) = $snmp_data{'1.3.6.1.2.1.25.1.1.0'} =~ /^(\d+)/;
}
else {
  $uptime = "U";
}

my $processes;
%snmp_data = %{walk_oids($session, "1.3.6.1.2.1.25.1.6")};
if(defined($snmp_data{'1.3.6.1.2.1.25.1.6.0'})) {
  ($processes) = $snmp_data{'1.3.6.1.2.1.25.1.6.0'} =~ /^(\d+)$/;
}
else {
  $processes = "U";
}

my %cpus;
%snmp_data = %{walk_oids($session, "1.3.6.1.2.1.25.3.3.1.2")};
my $cpu_min_id = -1;
foreach my $oid (sort keys %snmp_data) {
  if($cpu_min_id == -1) {
    ($cpu_min_id) = $oid =~ m/^1\.3\.6\.1\.2\.1\.25\.3\.3\.1\.2\.(\d+)$/;
  }
  my ($cpu_id) = $oid =~ m/^1\.3\.6\.1\.2\.1\.25\.3\.3\.1\.2\.(\d+)$/;
  $cpus{$cpu_id - $cpu_min_id} = $snmp_data{$oid};
}

my %device_types = (
  '1' => 'other',
  '2' => 'physical_memory',
  '3' => 'virtual_memory',
  '4' => 'disk',
  '5' => 'removable_disk',
  '6' => 'floppy',
  '7' => 'optical_drive',
  '8' => 'ramdisk',
  '9' => 'flash_memory',
  '10' => 'network_disk',
);

my %devices;
%snmp_data = %{walk_oids($session, "1.3.6.1.2.1.25.2.3")};
foreach my $oid (sort keys %snmp_data) {
  my ($key, $id) = $oid =~ m/^1\.3\.6\.1\.2\.1\.25\.2\.3\.1\.(\d+)\.(\d+)$/;
  if($key eq "2") {
    $snmp_data{$oid} =~ s/^1\.3\.6\.1\.2\.1\.25\.2\.1\.//;
    $devices{$id}{'type'} = $snmp_data{$oid};
    $devices{$id}{'type_txt'} = $device_types{$snmp_data{$oid}};
  }
  if($key eq "3") {
    $devices{$id}{'description'} = $snmp_data{$oid};
  }
  if($key eq "4") {
    $devices{$id}{'units'} = $snmp_data{$oid};
  }
  if($key eq "5") {
    $devices{$id}{'size'} = $snmp_data{$oid};
    if($system eq "windows" and $devices{$id}{'size'} < 0) {
      $devices{$id}{'size'} = 4294967296 - abs($devices{$id}{'size'});
    }
  }
  if($key eq "6") {
    $devices{$id}{'used'} = $snmp_data{$oid};
  }
}
foreach my $device_id (sort keys %devices) {
  if($devices{$device_id}{'size'} == 0) {
    next;
  }
  $devices{$device_id}{'size'} = $devices{$device_id}{'size'} * $devices{$device_id}{'units'};
  $devices{$device_id}{'used'} = $devices{$device_id}{'used'} * $devices{$device_id}{'units'};
  $devices{$device_id}{'free'} = $devices{$device_id}{'size'} - $devices{$device_id}{'used'};
}

my %netifs;
%snmp_data = %{walk_oids($session, "1.3.6.1.2.1.2.2.1")};
foreach my $oid (sort keys %snmp_data) {
  my ($key, $id) = $oid =~ m/^1\.3\.6\.1\.2\.1\.2\.2\.1\.(\d+)\.(\d+)$/;
  if($key eq "1") {
    my $if_data;
    $if_data = $session->get_request(-varbindlist => ["1.3.6.1.2.1.31.1.1.1.1." . $id]);
    if(defined($if_data->{"1.3.6.1.2.1.31.1.1.1.1." . $id}) and $if_data->{"1.3.6.1.2.1.31.1.1.1.1." . $id} ne "") {
      $netifs{$id}{'name'} = $if_data->{"1.3.6.1.2.1.31.1.1.1.1." . $id};
      $netifs{$id}{'name'} =~ s/(\s+|\x00)$//;
    }
    else {
      $netifs{$id}{'name'} = "eth_" . $id;
    }
    $if_data = $session->get_request(-varbindlist => ["1.3.6.1.2.1.31.1.1.1.18." . $id]);
    if(defined($if_data->{"1.3.6.1.2.1.31.1.1.1.18." . $id}) and $if_data->{"1.3.6.1.2.1.31.1.1.1.18." . $id} ne "") {
      $netifs{$id}{'alias'} = $if_data->{"1.3.6.1.2.1.31.1.1.1.18." . $id};
      $netifs{$id}{'alias'} =~ s/(\s+|\x00)$//;
    }
    else {
      $netifs{$id}{'alias'} = "Interface #" . $id;
    }
  }
  if($key eq "2") {
    $netifs{$id}{'description'} = $snmp_data{$oid};
    if($netifs{$id}{'description'} =~ m/^0x[0-9a-f]+$/) {
      $netifs{$id}{'description'} =~ s/^0x//;
      $netifs{$id}{'description'} = encode('ascii', pack("H*", uc($netifs{$id}{'description'})));
    }
    $netifs{$id}{'description'} =~ s/(\s+|\x00)$//;
  }
  if($key eq "10") {
    $netifs{$id}{'rx'} = $snmp_data{$oid};
  }
  if($key eq "13") {
    $netifs{$id}{'rx_discards'} = $snmp_data{$oid};
  }
  if($key eq "14") {
    $netifs{$id}{'rx_errors'} = $snmp_data{$oid};
  }
  if($key eq "16") {
    $netifs{$id}{'tx'} = $snmp_data{$oid};
  }
  if($key eq "19") {
    $netifs{$id}{'tx_discards'} = $snmp_data{$oid};
  }
  if($key eq "20") {
    $netifs{$id}{'tx_errors'} = $snmp_data{$oid};
  }
}

# Close SNMP connection
$session->close();

# Handle config
if(defined($ARGV[0]) and $ARGV[0] eq 'config') {
  print <<EOF;
multigraph $graph_name\_uptime
host_name $host_name
graph_title System uptime
graph_args --base 1000 --lower-limit 0
graph_scale no
graph_vlabel days
graph_category system

uptime.label Uptime
uptime.type GAUGE
uptime.min 0
uptime.draw AREA
uptime.colour 00d000

multigraph $graph_name\_processes
host_name $host_name
graph_title Running processes
graph_args --base 1000 --lower-limit 0
graph_scale no
graph_category processes

processes.label Processes
processes.type GAUGE
processes.min 0
processes.draw AREA
processes.colour 0000a0

EOF

  my $graph_order = "";
  foreach my $cpu_num (sort {$a <=> $b} keys %cpus) {
    $graph_order .= " cpu" . $cpu_num;
  }
  print <<EOF;
multigraph $graph_name\_cpu_usage
host_name $host_name
graph_title CPU usage
graph_args --base 1000 --lower-limit 0 --upper-limit 100
graph_order$graph_order
graph_scale no
graph_vlabel %
graph_category cpu

EOF
  foreach my $cpu_num (sort {$a <=> $b} keys %cpus) {
    print <<EOF;
cpu$cpu_num.label CPU core $cpu_num
cpu$cpu_num.type GAUGE
cpu$cpu_num.draw LINE1
cpu$cpu_num.min 0
cpu$cpu_num.max 100
cpu$cpu_num.warning $cpu_warning
cpu$cpu_num.critical $cpu_critical
EOF
    if($cpu_num == 0) {
      print("cpu" . $cpu_num . ".line " . $cpu_warning . ":ffff00:Warning threshold\n");
    }
  }

foreach my $device_id (sort keys %devices) {
  if($devices{$device_id}{'type'} eq "2") {
    my $warning = $devices{$device_id}{'size'} * $memory_warning / 100;
    my $critical = $devices{$device_id}{'size'} * $memory_critical / 100;
    my $used_color = "0000ff";
    if($devices{$device_id}{'used'} > $warning) {
      $used_color = "ff9500";
    }
    if($devices{$device_id}{'used'} > $critical) {
      $used_color = "ff0000";
    }
    print <<EOF;
multigraph $graph_name\_memory_usage
host_name $host_name
graph_title Memory usage
graph_order used free size
graph_args --base 1024 --lower-limit 0
graph_vlabel bytes
graph_category memory

free.label Free memory
free.type GAUGE
free.draw AREASTACK
free.colour 00ff00
used.label Used memory
used.type GAUGE
used.draw AREASTACK
used.colour $used_color
used.line $warning:ffff00:Warning threshold
used.warning $warning
used.critical $critical
size.label Memory size
size.type GAUGE
size.draw LINE1
size.colour 000000

EOF
  }
  if($devices{$device_id}{'type'} eq "3") {
    my $warning = $devices{$device_id}{'size'} * $swap_warning / 100;
    my $critical = $devices{$device_id}{'size'} * $swap_critical / 100;
    my $used_color = "0000ff";
    if($devices{$device_id}{'used'} > $warning) {
      $used_color = "ff9500";
    }
    if($devices{$device_id}{'used'} > $critical) {
      $used_color = "ff0000";
    }
    print <<EOF;
multigraph $graph_name\_swap_usage
host_name $host_name
graph_title Swap usage
graph_order used free size
graph_args --base 1024 --lower-limit 0
graph_vlabel bytes
graph_category memory

free.label Free swap
free.type GAUGE
free.draw AREASTACK
free.colour 00ff00
used.label Used swap
used.type GAUGE
used.draw AREASTACK
used.colour $used_color
used.line $warning:ffff00:Warning threshold
used.warning $warning
used.critical $critical
size.label Swap size
size.type GAUGE
size.draw LINE1
size.colour 000000

EOF
  }
  if($devices{$device_id}{'type'} eq "4") {
    if($devices{$device_id}{'size'} == 0) {
      next;
    }
    if($devices{$device_id}{'description'} =~ m!/(dev|proc|sys|run)!) {
      next;
    }
    my $disk;
    my $label;
    if($system eq "windows") {
      ($disk, $label) = $devices{$device_id}{'description'} =~ m/^([A-Z]):.*Label:(.*)\s+Serial/;
      $label =~ s/\s+$//;
      if($label eq "") {
        $label = $disk . ":";
      }
      else {
        $label = $disk . ": (" . $label . ")";
      }
    }
    elsif($system eq "linux") {
      if($devices{$device_id}{'description'} eq "/") {
        $disk = "root";
      }
      else {
        $disk = $devices{$device_id}{'description'};
        $disk =~ s!^/!!g;
        $disk =~ s/[^a-zA-Z0-9]/_/g;
      }
      $label = $devices{$device_id}{'description'};
    }
    elsif($system eq "routeros") {
      if($devices{$device_id}{'description'} eq "system disk") {
        $disk = "system";
      }
      else {
        $disk = $devices{$device_id}{'description'};
        $disk =~ s!^/!!g;
        $disk =~ s/[^a-zA-Z0-9]/_/g;
      }
      $label = $devices{$device_id}{'description'};
    }
    my $warning = $devices{$device_id}{'size'} * $disk_warning / 100;
    my $critical = $devices{$device_id}{'size'} * $disk_critical / 100;
    my $used_color = "0000ff";
    if($devices{$device_id}{'used'} > $warning) {
      $used_color = "ff9500";
    }
    if($devices{$device_id}{'used'} > $critical) {
      $used_color = "ff0000";
    }
    print <<EOF;
multigraph $graph_name\_disk_$disk
host_name $host_name
graph_title $label
graph_order used free size
graph_args --base 1024 --lower-limit 0
graph_vlabel bytes
graph_category storage

free.label Free
free.type GAUGE
free.draw AREASTACK
free.colour 00ff00
used.label Used
used.type GAUGE
used.draw AREASTACK
used.colour $used_color
used.line $warning:ffff00:Warning threshold
used.warning $warning
used.critical $critical
size.label Size
size.type GAUGE
size.draw LINE1
size.colour 000000

EOF
  }
}

foreach my $netif_id (sort keys %netifs) {
  if($system eq "windows") {
    if($netifs{$netif_id}{'description'} =~ m/(Miniport|ISATAP|isatap|Loopback|RAS|Filter|Scheduler|Pseudo)/) {
      next;
    }
  }
  elsif($system eq "linux") {
    if($netifs{$netif_id}{'name'} eq "lo") {
      next;
    }
  }
  my $netif = $netifs{$netif_id}{'name'};
  $netif =~ s/[^a-zA-Z0-9]/_/g;
  print <<EOF;

multigraph $graph_name\_$netif\_traffic
graph_title Traffic on $netifs{$netif_id}{'description'}
graph_order rx_bytes tx_bytes
graph_args --base 1024 --lower-limit 0
graph_vlabel bytes
graph_category netif

rx_bytes.label Incoming traffic
rx_bytes.type DERIVE
rx_bytes.min 0
rx_bytes.draw AREA
rx_bytes.colour 00ff00
tx_bytes.label Outgoing traffic
tx_bytes.type DERIVE
tx_bytes.min 0
tx_bytes.draw LINE1
tx_bytes.colour 0000ff

multigraph $graph_name\_$netif\_errors
graph_title Errors on $netifs{$netif_id}{'description'}
graph_order rx_errors tx_errors rx_discards tx_discards
graph_args --base 1000 --lower-limit 0
graph_scale no
graph_category netif

rx_errors.label RX errors
rx_errors.type DERIVE
rx_errors.min 0
rx_errors.draw AREASTACK
rx_errors.colour 0000ff
tx_errors.label TX errors
tx_errors.type DERIVE
tx_errors.min 0
tx_errors.draw AREASTACK
tx_errors.colour ff00ff
rx_discards.label RX discards
rx_discards.type DERIVE
rx_discards.min 0
rx_discards.draw AREASTACK
rx_discards.colour ff0000
tx_discards.label TX discards
tx_discards.type DERIVE
tx_discards.min 0
tx_discards.draw AREASTACK
tx_discards.colour ff9500
EOF
}
  exit(0);
}

print <<EOF;
multigraph $graph_name\_uptime
uptime.value $uptime
multigraph $graph_name\_processes
processes.value $processes
multigraph $graph_name\_cpu_usage
EOF

foreach my $cpu_num (sort {$a <=> $b} keys %cpus) {
  print("cpu" . $cpu_num . ".value " . $cpus{$cpu_num} . "\n");
}

foreach my $device_id (sort keys %devices) {
  if($devices{$device_id}{'type'} eq "2") {
    print <<EOF;
multigraph $graph_name\_memory_usage
free.value $devices{$device_id}{'free'}
used.value $devices{$device_id}{'used'}
size.value $devices{$device_id}{'size'}
EOF
  }
  if($devices{$device_id}{'type'} eq "3") {
    print <<EOF;
multigraph $graph_name\_swap_usage
free.value $devices{$device_id}{'free'}
used.value $devices{$device_id}{'used'}
size.value $devices{$device_id}{'size'}
EOF
  }
  if($devices{$device_id}{'type'} eq "4") {
    if($devices{$device_id}{'size'} == 0) {
      next;
    }
    if($system eq "linux" and $devices{$device_id}{'description'} =~ m!/(dev|proc|sys|run)!) {
      next;
    }
    my $disk;
    if($system eq "windows") {
      ($disk) = $devices{$device_id}{'description'} =~ m/^([A-Z]):/;
    }
    elsif($system eq "linux" or $system eq "esxi") {
      if($devices{$device_id}{'description'} eq "/") {
        $disk = "root";
      }
      else {
        $disk = $devices{$device_id}{'description'};
        $disk =~ s!^/!!g;
        $disk =~ s/[^a-zA-Z0-9]/_/g;
      }
    }
    elsif($system eq "routeros") {
      if($devices{$device_id}{'description'} eq "system disk") {
        $disk = "system";
      }
      else {
        $disk = $devices{$device_id}{'description'};
        $disk =~ s!^/!!g;
        $disk =~ s/[^a-zA-Z0-9]/_/g;
      }
    }
    print <<EOF;
multigraph $graph_name\_disk_$disk
free.value $devices{$device_id}{'free'}
used.value $devices{$device_id}{'used'}
size.value $devices{$device_id}{'size'}
EOF
  }
}

foreach my $netif_id (sort keys %netifs) {
  if($system eq "windows") {
    if($netifs{$netif_id}{'description'} =~ m/(Miniport|ISATAP|isatap|Loopback|RAS|Filter|Scheduler|Pseudo)/) {
      next;
    }
  }
  elsif($system eq "linux") {
    if($netifs{$netif_id}{'name'} eq "lo") {
      next;
    }
  }
  my $netif = $netifs{$netif_id}{'name'};
  $netif =~ s/[^a-zA-Z0-9]/_/g;
  print <<EOF;
multigraph $graph_name\_$netif\_traffic
rx_bytes.value $netifs{$netif_id}{'rx'}
tx_bytes.value $netifs{$netif_id}{'tx'}
multigraph $graph_name\_$netif\_errors
rx_errors.value $netifs{$netif_id}{'rx_errors'}
tx_errors.value $netifs{$netif_id}{'tx_errors'}
rx_discards.value $netifs{$netif_id}{'rx_discards'}
tx_discards.value $netifs{$netif_id}{'tx_discards'}
EOF
}

exit(0);

sub walk_oids() {
  my ($session, $search_oid) = @_;
  my @args = (-varbindlist => [$search_oid],);
  my $last_oid = $search_oid;
  my %oids;

  if ($session->version() == SNMP_VERSION_1) {
    while(defined $session->get_next_request(@args)) {
      my $oid = ($session->var_bind_names())[0];
      if (oid_lex_cmp($last_oid, $oid) >= 0) {
        return \%oids;
      }
      if(!oid_base_match($search_oid, $oid)) {
        last;
      }
      $oids{$oid} = $session->var_bind_list()->{$oid};
      $last_oid = $oid;
      @args = (-varbindlist => [$last_oid]);
      }
    }
  else {
    push @args, -maxrepetitions => 25;
    GET_BULK: while (defined $session->get_bulk_request(@args)) {
      my @oids = $session->var_bind_names();
      if(!scalar @oids) {
        return \%oids;
      }
      for my $oid (@oids) {
        if($session->var_bind_types()->{$oid} == ENDOFMIBVIEW) {
          $oids{$oid} = $session->var_bind_list()->{$oid};
          $last_oid = $oid;
          last GET_BULK;
          }
        if (oid_lex_cmp($last_oid, $oid) >= 0) {
          return \%oids;
        }
        if(!oid_base_match($search_oid, $oid)) {
          last GET_BULK;
        }
        $oids{$oid} = $session->var_bind_list()->{$oid};
        $last_oid = $oid;
        }
      @args = (-maxrepetitions => 25, -varbindlist => [$last_oid]);
      }
    }
  return \%oids;
}
