#!/perl/bin use strict; # irc_report.pl - Version 1.32 (Oct 23th, 1998) # # Perl-Script by Kajetan Hinner. This program is Mailware, every user should write to Kajetan@Hinner.com # irc_report.pl analyzes logfiles of ircII and creates reports, which can be used in Excel or SPSS for further analysis. # The following output of ircII commands are recognized: /lusers /list /date # To get the correct time of a specific channelcount-output of /list, irc_report.pl syncs on *%&MAGIC** $currtime() *%&MAGIC** # and stores the specific time of this output. # # irc_report.pl was developed using Linux 2.1.x, ircII 4.4, Perl 5.004 and KDE 4. As editor for large files I recommend nedit. # # How this script works: # irc_report.pl expects an ircII logfile as inputfile, which is scanned line by line. # Certain outputs of ircII commands are recognized and the specific data is stored. For example all output referring to # dates, i.e. the /date command or the output after /set log on. # Those data (ascii in $datestring) is stored in a hash. The numerical value of the date is stored in another hash, which # makes sorting much easier. Moreover, everyone can easily adopt the output format of date for his purpose. # The count of users and channels are stored in another hash. # The output of /list (channel names and population) are stored in an extra hash. # Then all data is printed or written to a file (input file with an appended .txt) # # # Possible outputs: # # "Adopted: Date String and Magic..." - this takes place, when time switches from 23 to 00 and there's no additional information to calculate the day # "Problem whith date which MUST be resolved". - look in your logfile and take care for the problem. Mostly date output too fast following after each other # # Author: Kajetan Hinner, University of Rostock, Sociology Dep., Kajetan@Hinner.com # # Notice: My aim was to write a working program which is reliable and easy to maintain. It was not intended to write # state of the art perl code, which may be hard to read for others. All remarks are in german. Sorry for that. use Time::Local; use Date::Manip; # &Date_Init("Language=German", "DateFormat=non-US"); # Select Date Format my $debug=2; print ("Debug Mode On ($debug)\n") if ($debug); # 1: print serious bugs (default) # 2: print something (for the interested into the internal working of the program) # 3: print everything (for debugging) my $zeile = 0; # Gescannte Zeile der Auswertungsdatei my $zeilenno = 0; # Zeilenzähler der Auswertungsdatei my $counter = 0; my $idlecount = 0; # zählt die "idle"-messages (nur aus spaß) my $logcount = 0; # zaehlt die Anzahl der IrcLog Start-Messages my $users = 0; # Hash für gesamte Auswertung!; Skalar zählt in der Zeile "There are x users and y services on z servers."; Skalar zählt auch am Ende bei der Gesamtauswertung my $services = 0; # dito my $invisible = 0; # used for evaluation of DALnet output my $servers = 0; # dito my $channels = 0; # hash; with date as argument number of channel users is saved # the $channels{"channelname"}{"sum"} sums channel users up my $clients = 0; my $ges_users = 0; # für Summenbildung bei der Endausgabe my $ges_services = 0; my $ges_servers = 0; my $ges_channels = 0; my $ges_clients = 0; my $datestring = 0; # Aktuelles Datum und Zeit; im Excel-Format: "tt/mm/yy hh:mm" - wird je nach Position zusammengebastelt my $dateval = 0; # im timelocal() Format (Sekunden seit 1.1.70) my %dateh; # Datehash; $dateh{$dateval}=$datestring; kann ich sortieren und unabhängig vom Datumformat foreach()en my $date_quelle; # Grundlage der Berechnung (IRCServer: Ausgabe bei Start von IRC; Magic: MAGIC-String; my $kh_monat = 0; my $kh_tag = 0; my $kh_jahr = 98; # ! Initialize with date 1998! If your log starts earlier, reset this date (this is necessary, because IRClog message doesn't print the actual year) my $kh_stunde = 0; my $kh_minuten = 0; my $kh_tz = 0; # kh_timezone; my $kh_stunde_sik = 0; # Remembers the last hour or day_of_year. Necessary to detect day turnovers. my $kh_tag_sik = 0; # (hour changes from 23 to 0 and there is no change in the day_of_year) # my $hh = 0; # Stunden bei der MAGIC-Auswertung # my $mm = 0; # Minuten bei der MAGIC-AUSWERTUNG # my $ss = 0; # Sekunden bei der MAGIC-Auswertung my $t = 0; # Tag (bei der IRC-Server Anfangsausgaben-Auswertung) my $m = 0; # Monat my $j = 0; # Jahr my $stu = 0; # Stunden my $min = 0; # Minuten my $sz_server = 0; # Nach einiger Zeit habe ich in mein Script eine Ausgabe der Serverzeit mit eingefügt my $sz_wochentag = 0; # bei jedem MAGIC-Aufruf... my $sz_monat = 0; # Diese Variablen nehmen die entsprechenden Felder auf. my $sz_tag = 0; # Sie sollten fuer sich sprechen my $sz_jahr = 0; my $sz_stunde = 0; # Beispiel: *** TU-Ilmenau.DE Saturday March 7 1998 -- 12: 00 +01:00 my $sz_minute = 0; my $sz_zeitzone = 0; my $sz_zeile = 0; # Zeilennummer des Datenfiles, in der eine derartige Server-Zeitausgabe auftrat my $irclog_zeile = 0; # Zeilennummer des Datenfiles, in der eine IRC-Start-Log-Zeile steht (für Zeitsync) #my $u_connections = 0; # zählt unknown_connections my $magic_zeile = 0; # Zeilennummer des Datenfiles, in der das letzte "MAGIC"-(Zeitangabe)-Statement vorkam my $magic_problem = 0; # es kam eine "There are... Users... " Zeile nicht hinter einem MAGIC oder einer SERVERZEIT. Letzte Moeglichkeit: es war in einer Server-Startmeldung. Falls es in diesem Zusammenhang auch nicht gefunden wird, kommt eine Fehlermeldung. Ist auch mehr oder weniger ein Flag, weil $there_are_zeile das gleiche speiohert! my $there_are_zeile = 0; # Im Datenfile kam "There are ... users" vor, aber ohne magic. Also ni der Nähe vom Neustart des Scripts. -> Wenn ein paar Zeilen später eine Datumszeile zu finden ist, werden die Werte übernommen. my %data; # super-hash für Anzahl User je nach Datum my %channels; # Anzahl Channeluser nach Datum my $channel = 0; # Name des Channels my $anzahl = 0; # Anzahl User des Channels; my $topic = 0; # Topic des Channels; my %topics; # Topic je nach Channel und Datum # my %months = ( qw |January February March April May June July August September October November December|) => (1..12); my %months = ("January", 1, "February", 2, "March", 3, "April", 4, "May", 5, "June", 6, "July", 7, "August", 8, "September", 9, "October", 10, "November", 11, "December", 12); my %months3 = ("Jan", '01', "Feb", '02', "Mar", '03', "Apr", '04', "May", '05', "Jun", '06', "Jul", '07', "Aug", '08', "Sep", '09', "Oct", 10, "Nov", 11, "Dec", 12); # print $months{"april"}; # %months = ( january => 1, february => 2, december => 12 ); # $when_number = $months{lc($when)}; sub kh_settime { ($kh_tag, $kh_monat, $kh_jahr, $kh_stunde, $kh_minuten, $kh_tz) = @_; $dateval = timelocal (0, $kh_minuten, $kh_stunde, $kh_tag, $kh_monat-1, $kh_jahr); # Beachte: $kh_monat - 1, weil Der Monatswertebereich nur von 0 bis 11 geht, ich aber von 1 bis 12 hantiere (ist ja auch logischer, finde ich). siehe [man localtime()] if ($kh_jahr > 1900 ) { $kh_jahr -= 1900; # auf zwei Stellen normieren if ($kh_jahr > 100) { $kh_jahr -= 100 }; # ich lass mir kein 2000 problem anhängen... ;) } if ($kh_jahr == 0) {print ("Invalid date at line # $zeilenno \n");} $datestring = $kh_tag . "/" . $kh_monat . "/" . "$kh_jahr" . " " . $kh_stunde . ":" . $kh_minuten; if (!$kh_tag) { print ("Big problem while parsing the date: Zeile $zeilenno, Datum: $datestring\n") }; $dateh{$dateval}=$datestring; # entscheidende Zeile. Dient der späteren Sortierung nach Datum. Hat den Vorteil, daß mich dies unabhängig vom ausgegebenen Datums-Format macht. # Wenn die Art und Weise, wie das Datum ausgegeben werden soll, geändert werden soll, einfach nur die $datestring-Def. drei Zeilen weiter oben ändern! # print ("Wert: $dateval und Datum $datestring in Zeile $zeilenno\n"); return ($datestring); } # sub kh_gettime { # # # if (($kh_tag, $kh_monat, $kh_jahr, $kh_stunde, $kh_minuten) = ($datestring =~ /(\d+)\/(\d+)\/(\d+) (\d+):(\d+)/)) { # print ("gettime ok\n"); # } # return ($datestring); #} # sub kh_monat_wandeln { # ($a) = @_; # print ("Angekommener Monat " . $a . "\n"); # ### return (2) if (/January|February|March|April|May|June|July|August|September|October|November|December/); # print ("***Umwandlen von Monat " . $a . " in " . $months{$a} . "\n"); # return ($months{@a}); # print ("***Umwandlen von Monat " . $a . "hat nicht geklappt\n"); # } # scan @ARGV my $self = $0; # Name of perl-script $self =~ s!^.*/!!; if (scalar @ARGV < 1 || scalar @ARGV > 2) { die "Usage: perl $self datafile(.log) [offset]\n (in seconds) will be added to each server time output.\n"; } # my $filevar ="/usr/home/khh/Socius_I.log"; my $filevar = $ARGV[0]; my $offset_secs = 0; # Seconds to add to server-output-date if (scalar @ARGV > 1) { $offset_secs = $ARGV[1]; if ($offset_secs =~ /[a-z]/i) { die " only accepts figures.\n";}; } my $channel_outfile = $filevar . ".chan"; my $date_outfile = $filevar . ".data"; my $dateserverstring; # extracted date from server output open (FILE, "<".$filevar) or die ("Could not open ".$filevar." $!\n"); while ($zeile = ) { $zeilenno++; # Zeilenzähler inkrementieren # print ( $zeile . "\n") if ($debug >1); # first of all we search all sources for date output. # Following tests for "*** Goettingen.DE.EU.undernet.org 888116507 15 Sunday February 22 1998 -- 04: 0" and sets the variable $sz_zeile to line# if ((my $server, my $wochentag, my $monat, my $tag, my $jahr, my $stunde, my $minute, my $zeitzone) = ($zeile =~ /^\*\*\* (.*) (Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday) (.*) (\d+) (\d+) \-\- (\d+): (\d+) .*$/)) { # or for '*** Goettingen.DE.EU.undernet.org 888015724 12 Saturday February 21 1998 -- 00: 01 +01:00' # should also work for *** Goettingen.DE.EU.undernet.org 895100340 -3 Thursday May 14 1998 -- 00: 59 +02:00 # get rid of server info # before or extracts like above... after the next one. # after or: *** TU-Ilmenau.DE Thursday February 19 1998 -- 04: 01 +01:00 if ((($dateserverstring) = ($zeile =~ /\*\*\* \S* \d* -?\d{1,3} \S* (.*)$/)) || (($wochentag, $dateserverstring) = ($zeile =~ /\*\*\* \S* (Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday) (.*)$/))) { print ("Found time output of IRCserver... \$Datestring is '$dateserverstring', line: $zeilenno\n") if ($debug > 2); # we need a different approach here because of timezone and stuff. so i used datemanip # check for 'February 18 1998 -- 19: 02 -08:00' if ($dateserverstring =~ /\S* \d{1,2} \d{4} -- \d{1,2}: \d{1,2} .\d\d:\d\d$/) { print ("\$Dateserverstring to de-space '$dateserverstring'\n") if ($debug > 2); $dateserverstring =~ s/^(.*\:) (\d{1,2} .*)$/$1$2/; # get rid of space before minute (ircII bug) print ("\$Dateserverstring after de-space '$dateserverstring'\n") if ($debug > 2); } my $date_epsecs=&UnixDate($dateserverstring, "%s"); # check for valid date if (!$date_epsecs) # did the conversion work? { print ("Problem with Date '$dateserverstring' in line $zeilenno. Date not used.\n"); } else { if ($offset_secs) # this was necessary because of TimeZone problems. { # print ("Before: Dateserverstring $dateserverstring\n"); $date_epsecs += $offset_secs; # add delta date $dateserverstring=&ParseDateString("epoch $date_epsecs"); # print ("After: Dateserverstring $dateserverstring\n"); } # print ("Epsecs: $date_epsecs, or localtime " . localtime($date_epsecs) . "\n"); ($monat, $tag, $jahr, $stunde, $minute) = split(" ", &UnixDate ($dateserverstring, "%m %d %Y %H %M")); print ("month: '$monat', day '$tag', year '$jahr' hour '$stunde', minute '$minute'\n") if ($debug > 2); # $kh_monat = $months{$monat}; # recode month $date_quelle = "date-command"; # store source of date (/date ircII command, output of server) kh_settime ($tag, $monat, $jahr, $stunde, $minute, $zeitzone); $sz_zeile = $zeilenno; # stores line# for later evaluation. } next; # next line } # matched somehow with dateoutput, but unknown format. Print error print ("Problem with Date '$dateserverstring' in line $zeilenno. Date not used.\n"); next; } # Date-output when IRClog was started if ((my $wochentag, my $monat, my $tag, my $stunde, my $minute) = ($zeile =~ /^IRC log started (Sun|Mon|Tue|Wed|Thu|Fri|Sat) (\S+) ?(\d+) ?(\d+):(\d+)$/)) { $logcount++; # store count of restarting the log $kh_monat = $months3{$monat}; # recoding of month-ascii to [1...12] $date_quelle = "IRCLOGSTART-DATE"; # store source of date (IRClog started) kh_settime ($tag, $kh_monat, $kh_jahr, $stunde, $minute, "DST"); $irclog_zeile = $zeilenno; # store line # for later evaluation next; } # following tests for "*** - 25/3/1998 20:04" - this is printed during the motd when IRClog is started, but only on IRCnet. # Therefor it is uncommented to be sure... you can include it, if you want. but check first. # if (($t, $m, $j, $stu, $min) = ($zeile =~ /^\*\*\* \- (\d+)\/(\d+)\/(\d+) (\d+):(\d+)$/)) { # print ("$zeile: Datumsausgabe, IRC-Server, Tag " . $t . ", Monat " . $m . ", Jahr " . $j . ", Stunde " . $stu . ", Minute " . $min . "\n"); # kh_settime ($t, $m, $j, $stu, $min, $kh_tz); # $datestring = $t . "/" . $m . "/" . "$j" . " " . $stu . ":" . $min; # $date_quelle = "IRCServer"; # } # following tests for " *%&MAGIC** 02:00:07 *%&MAGIC**" if ((my $hh, my $mm, my $ss) = ($zeile =~ /^ \*\%\&MAGIC\*\* (\d+):(\d+):(\d+) \*\%\&MAGIC\*\*/)) { # You will wish to comment the following. It was only needed because of changing of the day and not knowing it, just # because the hour changes from 23 to 0 and there was no change in the day of the year. This problem must be handled # the following way then. # Note: (23/10/98): I know this is quick&dirty, but: It's not needed to change all of this # because pretty early I introduced additional output of /date command so time calculations # based on MAGIC are pretty rare. The following routine should be replaced by Date::Calc # routines. Moreover, the whole $dateval, $datestring and $kh_time values should definitely # be replaced by a single date routine based on epoch secs. I will do this when I have time. if (($kh_stunde == 23) && ($hh == 0) && ($kh_tag == $kh_tag_sik) && !($date_quelle cmp "Magic")) { print ("Adopted! $zeilenno, $date_quelle und datestring $datestring\n"); $kh_tag++; if (($kh_monat == 2) && ($kh_tag == 29) && ($kh_jahr == 98)) {$kh_monat = 3; $kh_tag = 1} elsif (($kh_monat == 3) && ($kh_tag == 32) && ($kh_jahr == 98)) {$kh_monat = 4; $kh_tag = 1} elsif (($kh_monat == 7) && ($kh_tag == 32) && ($kh_jahr == 98)) {$kh_monat = 8; $kh_tag = 1} elsif (($kh_monat == 8) && ($kh_tag == 32) && ($kh_jahr == 98)) {$kh_monat = 9; $kh_tag = 1} elsif (($kh_monat == 9) && ($kh_tag == 31) && ($kh_jahr == 98)) {$kh_monat = 10; $kh_tag = 1} elsif ($kh_tag > 31) { print ("Attention, problem with date which MUST be resolved, in $zeilenno\n"); next; # skip rest }; } # end if wrap from 23 hours to 00 hours end next day kh_settime ($kh_tag, $kh_monat, $kh_jahr, $hh, $mm, $kh_tz); $kh_tag_sik = $kh_tag; $kh_stunde_sik = $kh_stunde; $magic_zeile = $zeilenno; $date_quelle = "Magic"; next; } # end if MAGIC # if there was a "magic-problem" (date found which may be not valid) and it is not resolved within six lines, print a warning message # if (($magic_problem != 0) && ($zeilenno - $magic_problem) > 6) { # print ("***Magic-Users-Sync problem in line $magic_problem, IRCLogzeile $irclog_zeile, Magiczeile $magic_zeile, Datestring $datestring\n"); # $magic_problem = 0; # } # print ("Time checked. Users: '$zeile'\n") if ($debug > 2); # from now on user counts are evaluated # syncs on "*** There are 7685 users and 34 services on 62 servers" if (($users, $services, $servers) = ($zeile =~ /^\*\*\* There are (\d+) users and (\d+) services on (\d+) servers/)) { # insert values in corresponding hashes $data{$datestring}{"users"} = $users; $data{$datestring}{"services"} = $services; $data{$datestring}{"servers"} = $servers; print ("Found: $users users, $services services and $servers servers in line $zeilenno\n") if ($debug > 2); # you may wish to uncomment the following lines (for invalid date) #if (($magic_zeile +1 != $zeilenno) and ($sz_zeile +1 != $zeilenno) ) { # $magic_problem = $zeilenno; # # print ("***Magic-Users-Synchronisationsproblem in Zeile " . $zeilenno . "\n"); # } $there_are_zeile = $zeilenno; next; } # same thing, but with "invisible"; common at Undernet # "*** There are 10566 users and 9654 invisible on 36 servers"; # also "plus" for "and" (found on Effnet) # "*** There are 5813 users plus 17797 invisible on 66 servers"; if (($users, my $nirwana, $invisible, $servers) = ($zeile =~ /^\*\*\* There are (\d+) users (and|plus) (\d+) invisible on (\d+) servers/)) { # Werte in die entsprechenden hashes eintragen... $data{$datestring}{"users"} = $users + $invisible; $data{$datestring}{"servers"} = $servers; # if ((($zeilenno - $magic_zeile) > 2) and ($irclog_zeile +4 != $zeilenno) ) { # $magic_problem = $zeilenno; # # print ("***Magic-Users-Synchronisationsproblem in Zeile " . $zeilenno . "\n"); # } print ("Found: $users users, $invisible invisible and $servers servers in line $zeilenno\n") if ($debug > 2); $there_are_zeile = $zeilenno; next; } # checking for "*** Current global users: 22846 Max: 25161" if (($users, $invisible) = ($zeile =~ /^\*\*\* Current global users: +(\d+) +Max: (\d+)$/)) { # insert value at corresponding hashes... $data{$datestring}{"users"} = $users; $there_are_zeile = $zeilenno; next; } if ($zeile =~ /\*\*\* There are (\d+) operators online/) {$data{$datestring}{"operators"} = $1; next; } if ($zeile =~ /(\d+) unknown connections/) { $data{$datestring}{"unknown_connections"} = $1; next; } if ($zeile =~ /(\d+) channels have been formed/) { $data{$datestring}{"channels"} = $1; next; } # the following won't be counted yet # /\*\*\* This server has (\d+) clients, (\d+) services and (\d+) servers connected/ && do # OK. Observing information about users from now on if (($channel, $anzahl, $topic) = ($zeile =~ /^\*\*\* \#(\S+)\s+(\d+)\s+(.+)$/)) { $channel = lc("$channel"); # lower case to count each channel just once $channels{$channel}{$datestring} = $anzahl; $channels{$channel}{"sum"} += $anzahl; $topics{$channel}{$datestring} = $topic; next; } # scans for "-> ** Anti-Idle if ($zeile =~ /^-\> \*\S+\* Anti-Idle$/) {$idlecount++; next;}; # /Anti-Idle/ && do { # $idlecount++; # the ircII-script sends an "Anti-Idle" /msg to itself # last SWITCH; # to avoid an idle-kick from irc-server # }; # print ("Current datestring \"" . $datestring . "\", source: $date_quelle\n"); } close(FILE); print ("Number of scanned lines: $zeilenno\n"); print ("IRC-Log-started count: $logcount\n"); print ("Idle-Messages $idlecount\n"); print ("\n"); $counter = 0; # counts all lines which channel-user-values, for mean. # ----------------- Writing output files; starting with all Users-Channels and according dates open (DATEOUT, "> $date_outfile") or die "Can't open $date_outfile : $!"; (print DATEOUT "Datum \tUsers\tChannels\tServices\tServers\t\n") or die "Can't write to $date_outfile: $!"; foreach $dateval (sort keys %dateh) { $datestring=$dateh{$dateval}; if (exists $data{$datestring}){ $counter += 1; # print ("Counter $counter \n"); $users = $data{$datestring}{"users"}; $ges_users += $users; $channels = $data{$datestring}{"channels"}; $ges_channels += $channels; $services = $data{$datestring}{"services"}; $ges_services += $services; $servers = $data{$datestring}{"servers"}; $ges_servers += $servers; # $clients = $data{$datestring}{"clients"}; $ges_clients += $clients; if (!defined $users) {$users="U"}; if (!defined $channels) {$channels="U"}; if (!defined $services) {$services="U"}; if (!defined $servers) {$servers="U"}; (print DATEOUT "$datestring\t$users\t$channels\t$services\t$servers \n") or die "Can't write to $date_outfile: $!"; } } close (DATEOUT); if ($counter) { $ges_users /= $counter; # now calculating some statistics, for output at console $ges_services /= $counter; $ges_servers /= $counter; $ges_channels /= $counter; # $ges_clients /= $counter; $zeile = sprintf ("Durchschnittswerte: Users\: %d Channels\: %d, Services\: %d, Servers\: %d \n", $ges_users, $ges_channels, $ges_services, $ges_servers ); print $zeile; } ############################################################# # Writing output file for all Channel-Data. Users in every channel for every date ####################### # $dateh{dateval}=$datestring; open (CHANNELOUT, "> $channel_outfile") or die "Kann $channel_outfile nicht oeffnen: $!"; (print CHANNELOUT " Date ") or die "Can't write to $channel_outfile: $!"; my $sumline = " Sum "; # this line contains the summed up channel values; printed as 2nd line foreach $channel (sort {$channels{$a}{"sum"} < $channels{$b}{"sum"} } keys %channels) { (print CHANNELOUT "$channel ") or die "Can't write to $channel_outfile: $!"; $sumline .= $channels{$channel}{"sum"} . " "; } (print CHANNELOUT "\n$sumline\n") or die "Can't write to $channel_outfile: $!"; # $channels{$channel}{$datestring} = $anzahl; # note: the creator-command looks like this my $no_counter=100; # how many entries should be printed? foreach $dateval (sort keys %dateh) { $datestring=$dateh{$dateval}; $zeile=""; # example: foreach $key (sort { $hash{$b} <=> $hash{$a} } keys %hash)) foreach $channel (sort {$channels{$a}{"sum"} < $channels{$b}{"sum"} } keys %channels) { $zeile .= "$channels{$channel}{$datestring} " }; # all channelcounts are concatenated in a string; if it's NOT only whitespace, it is appended to the report file; # whitespace means: there was a date output for this specific date, but no corresponding channelcount. if ($zeile =~ /\S+/) { (print CHANNELOUT "$datestring $zeile\n") or die "Kann nicht auf $channel_outfile schreiben: $!"; $no_counter--; } if (!$no_counter) {last}; # your time is all used up... } close (CHANNELOUT);