Software Freedom Law Center

root/trunk/antimatter/tim/Bot-BasicBot-Pluggable/examples/chump.cgi

Revision 53, 11.3 kB (checked in by bkuhn, 10 months ago)
  • Added SFLC's internally developed tim bot released under AGPLv3
Line 
1 #!/usr/bin/perl -w
2
3 # this won't work out the box. I'll fix it in a later release. But it's what
4 # we use to look at the output from the chumping module (Blog). See
5 # http://2lmc.org/blog
6
7 use strict;
8 use Template;
9 use CGI;
10 use Time::Local;
11 use Calendar::Simple;
12 use Digest::MD5 qw(md5_hex);
13 use LWP::Simple;
14 use Image::Size;
15 use DBI;
16
17 my $vars = {};
18
19 my $db = DBI->connect("DBI:mysql:database=jerakeen", "2lmc", "2lmc");
20
21 my %title;
22 if (open(TITLES, "titles.txt")) {
23     while (<TITLES>) {
24         chomp;
25         next unless $_;
26         my ($url, $title) = split(/\s+/, $_, 2);
27         $title{$url} = $title if $title;
28     }
29     close(TITLES);
30 }
31
32
33 my $timestamp = CGI::param("timestamp");
34 my $blog_id = CGI::param("blog_id");
35 my $upper = CGI::param("upper");
36 my $lower = CGI::param("lower");
37 my $search = CGI::param("search");
38
39 my $day = CGI::param("day");
40 my $month = CGI::param("month");
41 my $year = CGI::param("year");
42
43 my $title;
44
45 if ($day and $month and $year) {
46     $lower = timegm(0, 0, 0, $day, $month-1, $year-1900);
47     $upper = timegm(59, 59, 23, $day, $month-1, $year-1900);
48     $title = sprintf("%04d/%02d/%02d", $year, $month, $day);
49 } elsif ($month and $year) {
50     $lower = timegm(0, 0, 0, 1, $month-1, $year-1900);
51     $upper = timegm(0, 0, 0, 1, $month, $year-1900) if $month < 12;
52     $upper = timegm(0, 0, 0, 1, 0, $year-1899) if $month >= 12;
53     $title = sprintf("%04d/%02d", $year, $month);
54 } elsif ($year) {
55     $lower = timegm(0, 0, 0, 1, 0, $year-1900);
56     $upper = timegm(59, 59, 23, 1, 0, $year-1899);
57     $title = sprintf("%04d", $year);
58 }
59
60 $upper = 1500000000 unless defined($upper); # TODO - fix before Fri Jul 14 02:40:00 2017
61 $lower = 0 unless defined($lower);
62
63 my @calendar = calendar($month, $year, 1);
64 my $dates_ref = get_link_days($month, $year);
65 @calendar = merge(\@calendar, $dates_ref);
66
67 my @lt = localtime;
68 $vars->{calendar} = \@calendar;
69 $vars->{month} = $month || $lt[4]+1;
70 $vars->{year} = $year || $lt[5]+1900;
71 $vars->{today} = $day || $lt[3];
72 my @monthnames = (qw(dummy Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec));
73 $vars->{monthnames} = \@monthnames;
74 my @entries;
75
76 my $entry;
77
78 my $query;
79 if ($search) {
80     my $sql = "SELECT DISTINCT mindblog.* FROM mindblog,mindblog_comments ";
81     $sql .= "WHERE mindblog.blog_id=mindblog_comments.blog_id AND (";
82     my @terms = split(/[\s,]+/, $search);
83     $sql .= join(" AND ", map { "(mindblog.data LIKE '%$_%' OR mindblog_comments.data LIKE '%$_%')" } @terms);
84     $sql .= ") ORDER BY mindblog.timestamp DESC LIMIT 20";
85
86     print STDERR $sql;
87    
88     $query = $db->prepare($sql);
89     $query->execute();
90     $title = "search results for ".join(", ", @terms);
91
92 } elsif ($blog_id) {
93     $query = $db->prepare("SELECT * FROM mindblog WHERE blog_id=? ORDER BY timestamp DESC");
94     $query->execute($blog_id);
95
96 } elsif ($timestamp) {
97     $query = $db->prepare("SELECT * FROM mindblog WHERE timestamp=? ORDER BY timestamp DESC");
98     $query->execute($timestamp);
99    
100 } elsif ($upper and $lower) {
101     $query = $db->prepare("SELECT * FROM mindblog WHERE timestamp>? AND timestamp<? ORDER BY timestamp DESC");
102     $query->execute($lower, $upper);
103
104 } else {
105     $query = $db->prepare("SELECT * FROM mindblog ORDER BY timestamp DESC LIMIT 20");
106     $query->execute();
107     $title = "recent entries";
108 }
109
110 my $comment_query = $db->prepare("SELECT * FROM mindblog_comments WHERE blog_id=? ORDER BY timestamp");
111
112 while (my $row = $query->fetchrow_hashref) {
113
114     $row->{data} =~ s/#\s*$//;
115
116     if ($row->{data} =~ /^http:\S+$/) {
117         my $title = get_title($row->{data});
118         $row->{data} = "[$row->{data}|$title]" if $title;
119     }
120     $row->{message} = blog_filter($row->{data});
121
122     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = localtime($row->{timestamp});
123     $row->{date} = sprintf("%04d/%02d/%02d %02d:%02d", $year+1900, $mon+1, $mday, $hour, $min);
124
125     $comment_query->execute($row->{blog_id});
126     my $comments = [];
127     while (my $comment = $comment_query->fetchrow_hashref) {
128         $comment->{message} = blog_filter($comment->{data});
129         push(@$comments, $comment);
130     }
131
132     $row->{comments} = $comments;
133
134     push(@{$vars->{entries}}, $row);
135
136 }
137
138 if ($vars->{entries} and length(@{$vars->{entries}}) == 1) {
139     $title ||= $vars->{entries}->[0]->{message};
140     $title =~ s/<[^>]+>//g;
141 }
142
143 $title ||= "ramblings";
144 $vars->{title} = "2lmc blog - $title";
145 $vars->{sub_title} = $title;
146
147 $vars->{url} = CGI::url();
148
149 my @desc = (
150             "on the internet, nobody knows you're not the Gartner Group",
151             "Lasciate ogni speranza voi ch'entrate",
152             "We laugh at Devil Bunny",
153             "as despised by muttley",
154            );
155
156 $vars->{description} = $desc[3];
157
158 my $tt = Template->new(POST_FOLD=>1, PRE_FOLD=>1);
159
160 my $template = "chump.tem";
161
162 if (defined(CGI::param("rss"))) {
163     $template = "rss.tem";
164     print CGI::header("text/xml");
165     for (@{$vars->{entries}}) {
166         $_->{title} = $_->{message};
167         $_->{title} =~ s/<[^>]+>//g;
168     }
169 } else {
170     print CGI::header();
171 }
172
173 $tt->process($template, $vars) || print $tt->error();
174
175
176 sub blog_filter {
177     my $text = shift;
178
179     return '' if (!defined $text); # catch empty 'bc' mistakes
180
181     $text =~ s/&/&amp;/g;
182     $text =~ s/</&lt;/g;
183     $text =~ s/>/&gt;/g;
184     $text =~ s/((?:^|[\b\s]))(http:\/\/[^>\s\"]+)/$1<a href="$2">$2<\/a>/gi;
185     $text =~ s/\+\[([^\]]+)\]/chump_image($1)/eig;
186     $text =~ s/\[([^\]]+)\]/chump($1)/eig;
187
188     $text =~ s/\*([\w']+)\*/<b>$1<\/b>/ig;
189     $text =~ s/\s\/(\w+)\/\s/<i>$1<\/i>/ig;
190
191     return $text;
192 }
193
194 sub chump {
195     my $text = shift;
196     my ($one, $two) = split(/\|/, $text);
197     $one =~ s/^\s+//;
198     $one =~ s/\s+$//;
199     $two =~ s/^\s+// if $two;
200     $two =~ s/\s+$// if $two;
201
202
203     if ($two) {
204         # Ok, so we have [<one>|<two>]. We want to Do The Right Thing, and
205         # not require people to remember which way round to put the link and
206         # title. This is pretty easy to get right - 90% of the time, the link
207         # is really obvious. These tests will catch 99% of the cases.
208
209         # catch 'real' urls - http://, ftp://, etc.
210         if ($one =~ /^\w+:\/\//) {
211             return "<a href=\"$one\">$two</a>";
212         } elsif ($two =~ /^\w+:\/\//) {
213             return "<a href=\"$two\">$one</a>";
214
215         # catch just numbers, guess if it's a blog_id or a timestamp
216         # TODO if we ever have >10^8 blog entries, this will break.
217         # Hopefuly, time() will be larger by then, and I can adjust this
218         # number.
219         } elsif ($one =~ /^\d{8,}$/) {
220             return "<a href=\"".CGI::url()."?timestamp=$one\">$two</a>";
221         } elsif ($one =~ /^\d+$/) {
222             return "<a href=\"".CGI::url()."?blog_id=$one\">$two</a>";
223         } elsif ($two =~ /^\d{8,}$/) {
224             return "<a href=\"".CGI::url()."?timestamp=$two\">$one</a>";
225         } elsif ($two =~ /^\d+$/) {
226             return "<a href=\"".CGI::url()."?blog_id=$two\">$one</a>";
227
228         # Finally, if we've matched neither end so far, try to pick up a
229         # simpler form of uri, things like mailto:me@address.com.
230         } elsif ($one =~ /^\w+:/) {
231             return "<a href=\"$one\">$two</a>";
232         } elsif ($two =~ /^\w+:/) {
233             return "<a href=\"$two\">$one</a>";
234
235         # ok, you got me. I'm stumped. Print /something/, at least.
236         } else {
237             return "[$one|$two]";
238         }
239                
240     } else {
241         if ($one =~ /^\w+:\/\//) {
242             return "<a href=\"$one\">$one</a>";
243         } elsif ($one =~ /^\d{8,}$/) {
244             return "<a href=\"".CGI::url()."?timestamp=$one\">$one</a>";
245         } elsif ($one =~ /^\d+$/) {
246             return "<a href=\"".CGI::url()."?blog_id=$one\">$one</a>";
247         } else {
248             my $query = $db->prepare("SELECT * FROM infobot WHERE object=?");
249             $query->execute("blog_shortcut $one");
250             my $row = $query->fetchrow_hashref();
251             return "[$one]" unless $row;
252             return $row->{description} unless ($row->{description} =~ /\[(.*)\]/);
253             return chump($1);
254         }
255     }
256 }
257
258 sub chump_image {
259     my $text = shift;
260
261     unless ($text =~ /(?:gif|jpe?g|png)$/i) {
262         return "<br><iframe src=\"$text\" width=500 height=300></iframe><font size=-1>[<a href=\"$text\">$text</a>]</font><br>";
263     }
264     my $link = $text;
265     my $hash = md5_hex($text);
266     my $file = "cache/$hash";
267     unless (-e "$file.jpg") {
268         $text =~ s/&amp;/&/ig;
269         $text =~ s/%2E/./ig;
270         $text =~ s/%3A/:/ig;
271         $text =~ s/%2F/\//ig;
272         print STDERR "Getting $text to $hash\n";
273         mirror($text, $file);
274         print STDERR "Converting to jpg\n";
275         print STDERR `convert \"$file\" \"$file.jpg\"`;
276         my ($width, $height) = imgsize("$file.jpg");
277         if (($width > 300) or ($height > 150)) {
278             print STDERR "Resizing\n";
279             `convert -resize 300x150 \"$file.jpg\" \"$file.jpg\"`;
280            
281         } else {
282             undef $link;
283         }
284     }
285     my $ret = "<br>";
286     $ret .= "<a href=\"$link\">" if $link;
287     $ret .= "<img src=\"http://2lmc.org/blog/$file.jpg\" alt=\"$text\" title=\"$text\">";
288     $ret .= "</a>" if $link;
289     $ret .= "<br>";
290     return $ret;
291 }
292
293 sub get_title {
294     my $url = shift;
295     return $title{$url} if $title{$url};
296
297     print STDERR "title for $url not cached\n";
298     my $title;
299
300     my $data = get($url);
301
302     unless ($data) {
303         print STDERR "  Can't get page\n";
304
305     } elsif ($data =~ /<title>([^<]+)<\/title>/i) {
306         $title = $1;
307         $title =~ s/\|//g;
308         $title =~ s/\n//g;
309         $title =~ s/^\s+//;
310         $title =~ s/\s+$//;
311         print STDERR "  Found title $title\n";
312
313     } else {
314         print STDERR "  Can't find title\n";
315     }
316
317     $title ||= $url;
318     $title{$url} = $title;
319     save_titles();
320     return $url;
321 }
322
323 sub save_titles {
324     if (open(TITLES, ">titles.txt")) {
325         for (keys(%title)) {
326             print TITLES "$_ $title{$_}\n";
327         }
328         close(TITLES);
329     } else {
330         print STDERR "Can't save titles: $!\n";
331     }
332    
333 }
334
335 sub get_link_days {
336   my ($month, $year) = @_;
337
338   my ($start, $end)  = get_epochs($month, $year);
339   my %dates;
340
341   my $sql = "SELECT DISTINCT(FLOOR(timestamp/86400)) FROM mindblog WHERE timestamp > ? AND timestamp < ?";
342
343   my $query = $db->prepare($sql);
344   $query->execute($start, $end);
345
346   my $comment_query = $db->prepare("SELECT * FROM mindblog_comments WHERE blog_id=? ORDER BY timestamp");
347  
348   while (my $row = $query->fetchrow_arrayref) {
349 #    print "Got ", Dumper($row);
350
351     my ($day, $link) = get_url($row->[0]);
352
353     $dates{$day} = $link;
354   } 
355
356   return \%dates;
357 }
358
359 sub merge {
360   my ($cal, $dates) = @_;
361  
362   foreach my $week (@{ $cal }) {
363     foreach my $day (@{ $week }) {
364       next if (!defined $day);
365       if (exists $dates->{$day}) {
366         $day = { $day => $dates->{$day} };
367       } else {
368         $day = { $day => undef };
369       }
370     }
371   }
372  
373   return @{ $cal };
374 }
375
376 sub get_epochs {
377   my ($mon, $year) = @_;
378  
379   my @lt = localtime;
380   $mon  = $lt[4]+1    if (!$mon);
381   $year = $lt[5]+1900 if (!$year);
382
383   my $start_time = timelocal(0,0,0,1,$mon-1,$year-1900);
384   my $end_time;
385   if ($mon < 12) {
386     $end_time = timelocal(0,0,0,1,$mon,$year-1900);
387   } else {
388     $end_time = timelocal(0,0,0,1,0,$year-1900+1);
389   }
390
391   return ($start_time, $end_time);
392 }
393
394 sub get_url {
395   my $date  = shift;
396
397   my $epoch = $date*86400;
398   my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = localtime($epoch);
399
400   my $link = sprintf("?day=%d;month=%d;year=%d", $mday, $mon+1, $year+1900);
401   return ($mday, $link);
402 }
Note: See TracBrowser for help on using the browser.

SFLC Main Page

[frdm] Support SFLC