Software Freedom Law Center

root/trunk/antimatter/tim/Scripts/irc-log-convert.plx

Revision 53, 13.9 kB (checked in by bkuhn, 9 months ago)
  • Added SFLC's internally developed tim bot released under AGPLv3
  • Property svn:executable set to
Line 
1 #!/usr/bin/perl
2 # Copyright (C) 2006, 2007, 2008   Software Freedom Law Center, Inc.
3 #  Author: Bradley M. Kuhn <bkuhn@softwarefreedom.org>
4 #
5 #  This software gives you freedom; it is licensed to you under version
6 #  3 of the GNU Affero General Public License.
7 #
8 #  This software is distributed WITHOUT ANY WARRANTY, without even the
9 #  implied warranties of MERCHANTABILITY and FITNESS FOR A PARTICULAR
10 #  PURPOSE.  See the GNU Affero General Public License for further
11 #  details.
12 #
13 # You should have received a copy of the GNU Affero General Public
14 # License, version 3 along with this software.  If not, see
15 # <http://www.gnu.org/licenses/>.
16
17 use strict;
18 use warnings;
19
20 use Date::Manip;
21
22 use HTML::CalendarMonth;
23 use HTML::AsSubs;
24 use HTML::Entities;
25 use URI::Find::Schemeless;
26
27 my $BASE = "/var/www/intranet/irclogs";
28 my $URL_BASE = 'http://www.example.org/irclogs';
29 my $LOG_DIR = "/home/ircbot/channelLogs";
30 my $LAST_TOPIC;
31
32 my $BOT_RE = '(?:log|tem|tim)';
33
34 my $MAIN_CHANNEL = 'mainchannel';
35
36 my %NICKS;
37 my %TOPICS;
38
39 ###############################################################################
40
41 =item commify
42
43 Commify takes one argument, that is typically a sequence of digits, and
44 adds commas to the number to make it more readible by the human eye.  It
45 returns the commified string.  Note that it does not mess with the decmial
46 places -- if you want only two decimal places (i.e., for a dollar amount)
47 use sprintf to make it like that before sending it to Commify.
48
49 =cut
50
51 sub Commify ($) {
52     my $text = reverse $_[0];
53     $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
54     return scalar reverse $text;
55 }
56 ###############################################################################
57 sub YearHeader ($$$$) {
58   my($yearFH, $channel, $date, $prevDate) = @_;
59   my $year = UnixDate($date , "%Y");
60   print $yearFH <<HEADER;
61 <!DOCTYPE html
62         PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
63          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
64 <html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
65 <head>
66 <title>#$channel in $year - IRC Logs</title>
67 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
68 </head>
69 <body>
70 <center><h1>#$channel IRC Log in $year</h1></center>
71 <hr/>
72 HEADER
73
74   if (defined $prevDate) {
75     my $prevYear = UnixDate($prevDate , "%Y");
76     print $yearFH
77       "<p align=\"left\"><a href=\"$URL_BASE/$channel/${prevYear}.html\">\n",
78         "Back to $prevYear</a></p>\n";
79 }
80 }
81 ###############################################################################
82 sub FinishYear ($$$$) {
83   my($yearFH, $year, $nextYear, $channel) = @_;
84
85   print $yearFH "<hr width=\"80%\"/>\n",
86     "<center><h3>Interesting Statistics for $year</h3></center>\n",
87     "<table><tr><th>Nickname</th><th>Lines Spoken</th></tr>\n";
88   foreach my $nick (sort {$a cmp $b} keys %NICKS) {
89     next unless defined $NICKS{$nick}{$year}{$channel} and
90       $NICKS{$nick}{$year}{$channel} > 0;
91     print $yearFH "<tr><td>$nick</td><td>",
92       Commify($NICKS{$nick}{$year}{$channel}), "</td></tr>\n";
93   }
94   print $yearFH "</table><hr width=\"50%\"/><table><tr><th>Topic Line</th><th>Days Set In Year</th></tr>\n";
95   foreach my $topic (sort {$a cmp $b} keys %TOPICS) {
96     next unless defined $TOPICS{$topic}{$year}{$channel} and
97       $TOPICS{$topic}{$year}{$channel} > 0;
98     print $yearFH "<tr><td>$topic</td><td>",
99       Commify($TOPICS{$topic}{$year}{$channel}), "</td></tr>\n";
100   }
101   print $yearFH "</table>\n";
102   print $yearFH
103     "<p align=\"right\"><a href=\"$URL_BASE/$channel/${nextYear}.html\">",
104       "Proceed to $nextYear</a></p>\n"
105         if defined $nextYear;
106   print $yearFH <<END_YEAR
107 </table><hr width="80%"/>
108 <form name="ircForm" method="get" action="https://www.example.org/search">
109 <p>
110 <strong>Search All IRC Logs:</strong>
111 <input type="text" name="query" size="30">
112 <input type="submit" name="submit" value="Search!">
113 <input type="hidden" name="idxname" value="irclogs">
114 </p>
115 </form>
116
117 <hr/>
118 <p>Brought to you by your friendly neighborhood Logger Bot.</p>
119
120 </body></html>
121
122 END_YEAR
123 ;
124
125 }
126 ###############################################################################
127 {
128   my($dayFH, $dayFilename, $fileDate);
129
130     my $urlEncoder = URI::Find::Schemeless->new(
131                     sub {
132                       my($url, $text) = @_;
133                       return "<a href=\"" . $url->abs . "\">$text</a>";
134                     });
135
136 sub FinishPrevDay($$) {
137   my ($nextDate, $channel) = @_;
138
139   if (defined $dayFH) {
140     print $dayFH "</table>\n";
141
142     print $dayFH "<hr width=\"50%\">",
143       "<p align=\"right\"><a href=\"$URL_BASE/$channel/",
144         UnixDate($nextDate, "%Y-%m-%d.html"), "\">Proceed to ",
145           UnixDate($nextDate, "%A %e %B %Y"), "</a></p>\n"
146             if defined $nextDate;
147
148   print $dayFH <<END_DAY
149 </table><hr width="80%"/>
150 <form name="ircForm" method="get" action="https://www.example.org/search">
151 <p>
152 <strong>Search All IRC Logs:</strong>
153 <input type="text" name="query" size="30">
154 <input type="submit" name="submit" value="Search!">
155 <input type="hidden" name="idxname" value="irclogs">
156 </p>
157 </form>
158
159 <hr/>
160 <p>Brought to you by your friendly neighborhood Logger Bot.</p>
161
162 </body></html>
163
164 END_DAY
165 ;
166     $dayFH->close();
167     system("/bin/touch -t " .
168            UnixDate($fileDate, "%Y%m%d%H%M.%S") . " $dayFilename");
169   }
170   $dayFH = $dayFilename = undef;
171 }
172
173 sub DoDay ($$$$$) {
174   my($date, $lines, $channel, $fileBase, $previousPageDate) = @_;
175
176   my $longDay = UnixDate($date, "%A %e %B %Y");
177   my $shortDay = UnixDate($date, "%Y-%m-%d (%a)");
178   my $day = UnixDate($date, "%Y-%m-%d");
179   my $year = UnixDate($date, "%Y");
180
181   $fileDate = $date;
182   $dayFilename = "$fileBase/$day.html";
183   $dayFH = new IO::File(">$dayFilename");
184   $dayFH->autoflush(1);
185   die "unable to write $dayFilename: $!" unless defined $dayFH;
186   print $dayFH <<HEADER;
187 <!DOCTYPE html
188         PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
189          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
190 <html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
191 <head>
192 <title>#$channel on $shortDay - IRC Logs</title>
193 <link rel="stylesheet" type="text/css" href="/irc-logs.css" />
194 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
195 </head>
196 <body>
197 <center><h1>#$channel IRC Log for $longDay</h1></center>
198 <p align="right"><a href="$URL_BASE/$channel/$year.html">Back to #$channel Log Calendar</a></p>
199 HEADER
200
201  print $dayFH "<p align=\"left\"><a href=\"$URL_BASE/$channel/",
202     UnixDate($previousPageDate, "%Y-%m-%d.html"), "\">Back to to ",
203     UnixDate($previousPageDate, "%A %e %B %Y"), "</a></p><hr/>\n"
204       if defined $previousPageDate;
205  
206     my $nickRE = '';
207     foreach my $otherNick (keys %NICKS) {
208       $nickRE .= (($nickRE eq "") ? '(?:' : "|") . $otherNick;
209     }
210     $nickRE .= ')' if $nickRE ne "";
211
212   my( @lines, %topics);
213   my $topicCount = 0;
214   foreach my $line (@{$lines}) {
215     $line =~ s/^\s*(\S+)\s+//;
216     my $date = $1;
217     my $time = UnixDate($date, "%H:%M:%S");
218     my ($origNick, $nick, $body, $style);
219      if ($line =~ /^\s*\<(\S+)\>\s+(.*)\s*$/) {
220        ($origNick, $body, $style) = ($1, $2, 'post');
221        $nick = "$origNick<strong>:</strong>";
222
223      } elsif ($line =~ /^\s*\*\s*(\S+)\s+(.*)$/) {
224        ($origNick, $body, $style) = ($1, $2, 'me');
225        $body = encode_entities($body);
226        $nick = $origNick;
227      } elsif ($line =~ /^\s*(\S+)\s+((?:joined|left|quit)\s+.*)$/) {
228        ($origNick, $body, $style) = ($1, $2, 'comego');
229        $body .= ".";
230        $nick = $origNick;
231        $body = encode_entities($body);
232      } elsif ($line =~ /^\s*(\S+)\s+(is now known as)\s+(\S+)\s*$/) {
233        ($origNick, $body, $style) = ($1, $2, 'comego');
234        my $extraNick = $3;
235        $nick = $origNick;
236        $body = encode_entities($body) . " <b class=\"address\">" .
237          encode_entities($extraNick) . "</b>.";
238      } elsif ($line =~ /^\s*(\S+)\s+(kicked\s+from\s+\S+\s+by\s+)(\S+)(\s+.*)$/){
239        ($origNick, $body, $style) = ($1, $2, 'comego');
240        my $extraNick = $3;
241        $nick = $origNick;
242        $body = encode_entities($body) . " <b class=\"address\">" .
243          encode_entities($extraNick) . "</b>.";
244      } elsif ($line =~ /^\s*(\S+)\s+(set\s+a\s+new\s+topic\s+on\s+\S+:\s*)"(.*)"\s*$/){
245        my $topic;
246        ($origNick, $body, $style, $topic) = ($1, $2, 'topic', $3);
247        $topic = encode_entities($topic);
248        $body = encode_entities($body) . $topic;
249        $nick = $origNick;
250        $topics{$topic} = $topicCount++;
251      } elsif ($line =~ /^\s*The\s+existing\s+topic\s+on\s+\S+:?\s*"?(.*)"?\s*$/){
252        my $topic = $1;
253        $topics{$topic} = $topicCount++;
254        next;
255      } else {
256        warn "bad line $date $line\n";
257        print $dayFH  "<tr><td valign=\"top\" class=\"time\">$time</td><td></td><td>$line</td>\n";
258      }
259     $urlEncoder->find(\$body);
260
261     $NICKS{$origNick}{$year}{$channel} = 0
262       unless defined $NICKS{$origNick}{$year}{$channel};
263     $NICKS{$origNick}{$year}{$channel}++ if $style =~ /^(?:post|me)$/;
264
265     my $front = "";
266     while ($body =~ s%^(\s*$nickRE[,:\-\s])%%i) {
267       $front .= "<span class=\"address\">$1</span>";
268     }
269     $body = "$front$body";
270
271     $body =~ s%\*(\S+)\*%<strong>$1</strong>%g;
272
273     push(@lines,  <<TEXT_LINE
274 <tr>
275      <td valign="top" class="time">$time</td>
276      <td valign="top" class="nick">$nick</td>
277      <td class="$style">$body</td>
278 </tr>
279 TEXT_LINE
280 );
281   }
282   foreach my $topic (sort {$topics{$a} <=> $topics{$b} } keys %topics) {
283     $urlEncoder->find(\$topic);
284     print $dayFH "<center><h3>", $topic, "</h3></center>\n";
285     $LAST_TOPIC = $topic;
286     $TOPICS{$topic}{$year}{$channel} = 0
287       unless defined $TOPICS{$topic}{$year}{$channel};
288     $TOPICS{$topic}{$year}{$channel}++;
289
290   }
291   print $dayFH "<center><h3>$LAST_TOPIC</h3></center>\n"
292     if (keys %topics <= 0 and defined $LAST_TOPIC);
293
294   print $dayFH "<table>\n";
295   foreach my $line (@lines) { print $dayFH "$line\n";   }
296 }
297 }
298 ###############################################################################
299
300
301 open(TOP_LEVEL, ">$BASE/index.html") or die "unable to write $BASE/index.html:$!";
302
303 print TOP_LEVEL "<html><body><head><title>IRC Logs</title></head>\n",
304   "<body><center><h1>IRC Logs</h1></center>\n";
305
306 foreach my $file (<$LOG_DIR/*.log>) {
307   my $BEGINNING_OF_TIME = ParseDate('1975-11-02');
308   my $lastDate = $BEGINNING_OF_TIME;
309   my $previousPageDate;
310
311   my(%htmlMonth) = (object => undef, used => 0);
312   my $yearFH;
313   %TOPICS = %NICKS = ();
314
315   my $channel = $file;
316   $channel =~ s/^$LOG_DIR\/(\S+).log/$1/;
317   $channel =~ s/^(\S+)(\@localhost)$/$1/;
318   $channel =~ s/^\#//;
319
320   my $chanBase = "$BASE/$channel";
321   (mkdir($chanBase, 02750)
322    or die "unable to create $chanBase with mode 2750:$!")
323       unless (-d $chanBase);
324   chown(1002, 33, $chanBase);
325
326   open(IRC_LOG, "<$file") or die "unable to open $file for reading: $!";
327   my @lines;
328   while (my $line = <IRC_LOG>) {
329     chomp $line;
330     $line =~ /^(\S+)\s+/; my $date = $1;
331
332     if (UnixDate($date, '%Y-%m-%d') ne UnixDate($lastDate, '%Y-%m-%d')) {
333       if (@lines > 0 and not
334           (@lines == 1 and $lines[0] =~ /existing\s+topic\s+on/i)) {
335         FinishPrevDay($lastDate, $channel);
336         DoDay($lastDate, \@lines, $channel, $chanBase, $previousPageDate);
337         $htmlMonth{object}->item(int(UnixDate($lastDate, '%e')))
338           ->wrap_content(a({href => "$URL_BASE/$channel/" .
339                             UnixDate($lastDate, '%Y-%m-%d.html')}));
340         $htmlMonth{used} = 1;
341         $previousPageDate = $lastDate;
342       }
343       @lines = ();
344     }
345     if (UnixDate($date, '%Y-%m') ne UnixDate($lastDate, '%Y-%m')) {
346       if ($htmlMonth{used}) {
347         my $str = $htmlMonth{object}->as_HTML;
348         print $yearFH "$str<br/>\n";
349       }
350       my $month = UnixDate($date, '%m');
351       my $cm = HTML::CalendarMonth->new( month =>  UnixDate($date, '%m'),
352                                          year =>  UnixDate($date, '%Y'));
353       $cm->item_daycol('Sun', 'Sat')->attr(bgcolor => 'cyan');
354       my $f = HTML::Element->new('b');
355       $cm->item($cm->year, $cm->month)->wrap_content($f);
356       $htmlMonth{object} = $cm;
357       $htmlMonth{used} = 0;
358     }
359     if (UnixDate($date, '%Y') ne UnixDate($lastDate, '%Y')) {
360       if (defined $yearFH) {
361         FinishYear($yearFH, UnixDate($lastDate, '%Y'),
362                    UnixDate($date, '%Y'), $channel);
363         $yearFH->close();
364         system("/bin/touch -t " . UnixDate($lastDate, "%Y%m%d%H%M.%S") . " " .
365                UnixDate($lastDate, "$chanBase/%Y.html"));
366       }
367       $yearFH = new IO::File(
368                                 UnixDate($date, ">$chanBase/%Y.html"));
369       YearHeader($yearFH, $channel, $date,
370                  ($lastDate eq $BEGINNING_OF_TIME) ? undef : $lastDate);
371     }
372     $lastDate = $date;
373     push(@lines, $line)
374       unless $channel eq $MAIN_CHANNEL and $line =~ /^\s*\S+\s+$BOT_RE\s+joined/;
375     # Add the line to the list, unless it's just a bot joining the channel.
376   }
377
378   # After the loop, finish up the last one at each level
379
380   if (@lines > 0 and not
381       (@lines == 1 and $lines[0] =~ /existing\s+topic\s+on/i)) {
382     FinishPrevDay($lastDate, $channel);
383     DoDay($lastDate, \@lines, $channel, $chanBase, $previousPageDate);
384     FinishPrevDay(undef, $channel);
385     $htmlMonth{object}->item(int(UnixDate($lastDate, '%e')))
386       ->wrap_content(a({href => "$URL_BASE/$channel/" .
387                         UnixDate($lastDate, '%Y-%m-%d.html')}));
388     $htmlMonth{used} = 1;
389   } else {
390     FinishPrevDay(undef, $channel);
391   }
392   if (defined $yearFH) {
393     if ($htmlMonth{used}) {
394       print $yearFH $htmlMonth{object}->as_HTML;
395     }
396     FinishYear($yearFH, UnixDate($lastDate, '%Y'), undef, $channel);
397     $yearFH->close();
398     system("/bin/touch -t " . UnixDate($lastDate, "%Y%m%d%H%M.%S") . " " .
399            UnixDate($lastDate, "$chanBase/%Y.html"));
400   }
401   system("/bin/ln -fs $chanBase/" . UnixDate($lastDate, '%Y') .".html $chanBase/index.html");
402   warn "unable to link year file to index.html file: $!" unless ($? == 0);
403   print TOP_LEVEL "<li><a href=\"$URL_BASE/$channel\">$channel</a></li>\n";
404 }
405 print TOP_LEVEL "</ul></body></html>\n";
406
407
408 __END__
409 ###############################################################################
410 #
411 # Local variables:
412 # compile-command: "perl -I ../Modules -c irc-log-convert.plx"
413 # End:
Note: See TracBrowser for help on using the browser.

SFLC Main Page

[frdm] Support SFLC