Silicon Goblin Technologies Home
Services
Policies
Legal/copyrights
Contact
Links
Adult Literacy
Samples
Presentations

Perl Sample

This is a bit of sample Perl code that demonstrates our coding style. It's a parser for errorlogs for our hosted websites, which runs every night, examines the site's web error log, and extract and summarizes important bits of information to be mailed to the site's webmaster, to help them stay up on potential problems and nip them in the bud. Each site has a configuration file with settings and options specific to its own needs. (This code is still in development.)

#!/usr/bin/perl -w # # errorlog_monitor.pl # # By Steve Linberg, Silicon Goblin Technologies # Released under the Perl Artistic License # steve@silicongoblin.com use strict; use Fcntl qw(:flock); use Mail::Mailer; use Getopt::Long; use Data::Dumper; use File::Basename; use Date::Manip; my $VERSION = '0.1'; my $working_dir = q(/opt/errorlog_monitor/var); my $conf_dir = q(/opt/errorlog_monitor/conf); my @output_lines = (); my $smtp_host = 'smtp.foo.com'; my %opts = ( debug => 0, email => 1, print => 0, test => 0, ); Getopt::Long::Configure('bundling'); my $options_okay = GetOptions( \%opts, 'debug!', 'email!', 'print!', 'test!', 'date:s', 'version' => \&version, 'usage' => \&usage, 'dumpargs', ); usage() unless $options_okay; # Lock the semaphore file before we begin. This ensures that only one # copy of this script is running at a time. It also spares us the # headache of locking many different files and managing each lock's # state. Whoever owns the semaphore file gets to do anything. # Credit for this technique goes directly to Mark-Jason Dominus, who # articulated it very completely on the phl.pl list. Thanks, Mark-Jason. unless (open(SEMAPHORE, qq(>$working_dir/sem.LCK))) { die qq(Error: cannot open semaphore file [$working_dir/sem.LCK] ($!)); } flock SEMAPHORE, LOCK_EX; # lock the semaphore file, or wait. # Read the configuration files. $opts{debug} && print qq(Reading configuration files\n); opendir (DIR, $conf_dir) || die "Can't open configuration path '$conf_dir' ($!)"; my %config_files = map { $_ => 1 } grep m/\.conf$/, readdir(DIR); closedir DIR; # Get yesterday's date by default, or the speciified date. my ($day, $month, $year) = (); if ($opts{'date'}) { my $date = ParseDate($opts{'date'}); if (!$date) { die qq(Bad date string: $opts{'date'}); } else { ($year, $month, $day) = UnixDate($date, '%Y', '%f', '%e'); } } else { ($day, $month, $year) = (localtime(time-24*60*60))[3,4,5]; $month++; $year += 1900; } my $date_month_text = ('NULL', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' )[$month]; $month = sprintf(q(%02d), $month); my $zday = sprintf(q(%02d), $day); my $grep_pattern = qq(^\\[\\S+\\s+$date_month_text\\s+$day\\s+); my $grep_access_pattern = qq(\\[$zday\/$date_month_text\/$year:\\d\\d:\\d\\d:\\d\\d\\s\\S+\\]); my $date_error_logfile = $year . $month . '-error.log'; my $date_access_logfile = $year . $month . '-access.log'; my $printable_date = $year . '-' . $month . '-' . $zday; my %default_settings = ( ignore => [ 'favicon\.ico', 'mod_gzip: TRANSMIT_ERROR', 'MSOffice\/cltreq\.asp', '\/_vti_bin\/owssvr\.dll', ], local_regex => '/^$/', ); # Build a hash out of @ARGV, if specified, and strip out all but the filenames. my %only_process = map { basename($_) => 1 } @ARGV; if (keys %only_process) { my $all_specified_configs_exist = 1; for (keys %only_process) { if (!exists $config_files{$_}) { $all_specified_configs_exist = 0; print qq(** Configuration [$_] not found in [$conf_dir]\n); } } unless ($all_specified_configs_exist) { exit_prog(); } %config_files = %only_process; # only process these files. } # Process the configuration files. if ($opts{dumpargs}) { print q(Debug: ) . printopt('debug') . "\n"; print q(Email: ) . printopt('email') . "\n"; print q(Print: ) . printopt('print') . "\n"; print q(Test: ) . printopt('test') . "\n"; print q(Date: ) . $printable_date . "\n"; print qq(Configs:\n) . join("\n", map qq( $_), sort keys %config_files) . "\n"; exit_prog(); } CONFIGFILE: for my $config_file (sort keys %config_files) { (my $domain = $config_file) =~ s/\.conf$//o; $opts{debug} && print qq(\nProcessing config file $config_file\n\n); my $file = $conf_dir . '/' . $config_file; open (FILE, $file) or die "Can't open config file [$file]: $!"; my @lines = <FILE>; close FILE or die "Can't close config file [$file]: $!"; my %config = %default_settings; for (@lines) { chomp; next unless /^.+/; next if /^#/; /^(\S+)\s+(\S+)\s*(.*)$/; my ($directive, $arg1, $arg2) = (lc($1),$2,$3); $opts{debug} && print qq(Directive: [$directive], arg1 = [$arg1], arg2 = [$arg2]\n); # Install the directives in the config. if ($directive eq 'email') { push @{$config{email}}, $arg1; } elsif ($directive eq 'logfile_dir') { $config{logfile_dir} = $arg1; } elsif ($directive eq 'logfile_format') { $config{logfile_format} = $arg1; } elsif ($directive eq 'custom') { push @{$config{custom}}, [$arg1, $arg2]; } elsif ($directive eq 'ignore') { push @{$config{ignore}}, $arg1; } elsif ($directive eq 'local_regex') { $config{local_regex} = $arg1; } elsif ($directive) { print qq(** UNKNOWN DIRECTIVE [$directive], skipping domain...\n); next CONFIGFILE; } } if ($opts{test}) { print qq(Config hash for domain:\n); print Dumper(\%config); } my $logfile = $config{logfile_dir} . '/' . $date_error_logfile; $opts{debug} && print qq(Logfile for config $config_file is $logfile\n); if (!-e $logfile) { $opts{debug} && print qq(Logfile $logfile not found, skipping domain...\n); next CONFIGFILE; } next CONFIGFILE if $opts{test}; $opts{debug} && print qq(Grepping $logfile for $grep_pattern\n); # Make this more robust: read lines starting from first match, ending at first match of following day (if found) or eof (if not) open (LOGFILE, $logfile) or die qq(Can\'t open logfile [$logfile]: $!); my @logfile_lines = grep {/$grep_pattern/} <LOGFILE>; close LOGFILE or die qq(Can\'t close logfile [$logfile]: $!); my $matched_lines = scalar @logfile_lines; $opts{debug} && print qq(Matched $matched_lines lines.\n); my %custom_lines = (); my %lines_to_report = (); my %errors = (); @output_lines = (); my $errors_404_count = 0; LOGFILE_LINE: for my $line (@logfile_lines) { # Check the patterns to be ignored. for my $ignore_pattern (@{$config{ignore}}) { if ($line =~ m/$ignore_pattern/) { next LOGFILE_LINE; } } # Check the custom lines to match. for my $custom_pattern (@{$config{custom}}) { my ($pattern, $message) = @$custom_pattern; if ($line =~ m/$pattern/) { $custom_lines{$pattern}++; next LOGFILE_LINE; } } # Anything that's left, extract the message from the line (if possible). if ($line =~ m/\[error\]\s\[client.*?\]\s(.*)$/) { my $message = $1; $opts{debug} && print qq(analyzing [$message]...\n); if ($message =~ /^File does not exist: (.*)/) { # $errors{404}{$1}++; } else { $lines_to_report{$message}++; } } else { # If we can't extract any message, just log the whole line. $lines_to_report{$line}++; } } # Check the access log for 404s. $logfile = $config{logfile_dir} . '/' . $date_access_logfile; if (-e $logfile) { $opts{debug} && print qq(Grepping $logfile for $grep_access_pattern\n); open (LOGFILE, $logfile) or die qq(Can\'t open logfile [$logfile]: $!); @logfile_lines = grep {/$grep_access_pattern/} <LOGFILE>; close LOGFILE or die qq(Can\'t close logfile [$logfile]: $!); $matched_lines = scalar @logfile_lines; $opts{debug} && print qq(Matched $matched_lines lines.\n); ACCESS_LOGFILE_LINE: for my $line (@logfile_lines) { # Check the patterns to be ignored. for my $ignore_pattern (@{$config{ignore}}) { if ($line =~ m/$ignore_pattern/) { next ACCESS_LOGFILE_LINE; } } $line =~ m/^(\S+)\s # requestor (\S+)\s # ? (\S+)\s # ? \[(.*?)\]\s # time "(.*?)"\s # URL (\d+)\s # result (\d+)\s # bytes "(.*?)"\s # referrer "(.*?)"/x # user agent ; my ($req, $foo1, $foo2, $time, $url, $result, $bytes, $referrer, $ua) = ($1,$2,$3,$4,$5,$6,$7,$8,$9); next unless $result && $result eq 404; $url =~ s/^(GET|POST)\s*//; $url =~ s/\s+HTTP\/.*?$//; $opts{debug} && print qq(404 for [$url] from [$referrer]\n); $errors_404_count++; ;# total all 404 errors. $errors{404}{all_referrers}{$url}{total}++; # total all 404s for this url. $errors{404}{all_referrers}{$url}{$referrer}++; # total all 404s for this url from each referrer. # Now further subdivide the 404s into three groups, by referrers: # 1. No referrer / direct link (where referrer = "-") # All that can really be done here is decide whether to create # a rewrite_rule to handle it. # 2. Non-local referrer (where referrer doesn't match # $config{local_regex} regex) # This is a bad external link - the website could be notified, # and/or a rewrite rule could be added. # 3. Local referrer # This is a bad internal link that should be fixed. if ($referrer eq '-') { $errors{404}{no_referrers_total}++; $errors{404}{no_referrers}{$url}{total}++; $errors{404}{no_referrers}{$url}{referrers}{'[NO REFERRER]'}++; } elsif ($referrer =~ m/$config{local_regex}/) { $errors{404}{local_referrers_total}++; $errors{404}{local_referrers}{$url}{total}++; $errors{404}{local_referrers}{$url}{referrers}{$referrer}++; } else { $errors{404}{external_referrers_total}++; $errors{404}{external_referrers}{$url}{total}++; $errors{404}{external_referrers}{$url}{referrers}{$referrer}++; } } } if (keys %custom_lines) { push_line(qq(Custom lines matched:\n)); while (my ($k, $v) = each %custom_lines) { my $pl = $v == 1 ? '' : 's'; push_line(qq(* pattern [$k]: $v time$pl\n)); } } if (keys %lines_to_report) { push_line(qq(Standard lines matched:\n)); while (my ($k, $v) = each %lines_to_report) { my $pl = $v == 1 ? '' : 's'; push_line(qq([$k]: $v time$pl\n)); } } if (keys %errors) { if ($errors_404_count) { my $pl = $errors_404_count == 1 ? '' : 's'; push_line(qq($errors_404_count total file not found error$pl (404)\n\n)); my $hr_404s = $errors{404}; if ($hr_404s->{local_referrers_total}) { my $pl = $hr_404s->{local_referrers_total} == 1 ? '' : 's'; push_line(qq($hr_404s->{local_referrers_total} local referrer error$pl:\n)); push_404s($hr_404s->{local_referrers}); } if ($hr_404s->{external_referrers_total}) { my $pl = $hr_404s->{external_referrers_total} == 1 ? '' : 's'; push_line(qq($hr_404s->{external_referrers_total} external referrer error$pl:\n)); push_404s($hr_404s->{external_referrers}); } if ($hr_404s->{no_referrers_total}) { my $pl = $hr_404s->{no_referrers_total} == 1 ? '' : 's'; push_line(qq($hr_404s->{no_referrers_total} direct link error$pl (no referrer):\n)); push_404s($hr_404s->{no_referrers}, 1); # 0 = suppress referrer print } } } if (@output_lines) { my $body = qq( This message was generated by the error scanner. Do not reply. @output_lines ); print @output_lines if $opts{print}; if ($opts{email}) { for my $address (@{$config{email}}) { my $mailer = new Mail::Mailer 'smtp', Server => $smtp_host; $mailer->open({ From => 'Errorlog Monitor <steve@silicongoblin.com>', To => $address, Subject => qq(Errorlog report for $domain - $printable_date), }) or die "Can't open: $!\n"; print $mailer qq(** Do not reply to this message ** It was generated by the error monitor at Silicon Goblin Technologies. Questions or problems: contact Steve at <steve\@silicongoblin.com>. @output_lines ); $mailer->close(); } } } } sub push_line { my $line = shift; push @output_lines, $line; } sub push_404s { my $hr_urls = shift; my $suppress_referrers = shift || 0; for my $url (sort { $hr_urls->{$b}{total} <=> $hr_urls->{$a}{total} } keys %{$hr_urls} ) { my $hr_url = $hr_urls->{$url}; my @referrer_info = map { my $printable_referrer = $_ eq '-' ? 'direct link' : $_; [$hr_url->{total}, $printable_referrer] } keys %{$hr_url->{referrers}}; push_line(qq([$hr_url->{total}] $url\n)); if (!$suppress_referrers) { for my $ar_info (sort { $b->[0] <=> $a->[0] } @referrer_info) { my ($total, $referrer) = @$ar_info; my $total_verbiage = $total == 1 ? '' : qq([$total] ); my $referrer_verbiage = $suppress_referrers ? '' : $referrer; push_line(qq(\\-- $total_verbiage$referrer_verbiage\n)); } } } push_line("\n"); } sub usage { print <<USAGE_END; Usage: $0 [--[no](debug | print | email | test)] [--date <date>] [--dumpargs] [conf [...]] USAGE_END exit_prog(); } sub version { print qq($0 version $VERSION\n); exit_prog(); } sub printopt { my $opt = shift; return (exists $opts{$opt} && $opts{$opt}) ? 'On' : 'Off' } sub exit_prog { print qq(Exiting.\n); exit; }