| 1 |
#!/usr/bin/perl -w |
|---|
| 2 |
|
|---|
| 3 |
# Copyright 2005 Software Freedom Law Center, Inc. |
|---|
| 4 |
# |
|---|
| 5 |
# This program is free software: you may copy, modify, or redistribute it |
|---|
| 6 |
# and/or modify it under the terms of the GNU Affero General Public License |
|---|
| 7 |
# as published by the Free Software Foundation, either version 3 of the |
|---|
| 8 |
# License, or (at your option) any later version. |
|---|
| 9 |
# |
|---|
| 10 |
# This program is distributed in the hope that it will be useful, but |
|---|
| 11 |
# WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero |
|---|
| 13 |
# General Public License and/or GNU General Public License for more |
|---|
| 14 |
# details. |
|---|
| 15 |
# |
|---|
| 16 |
# You should have received a copy of the GNU Affero General Public License |
|---|
| 17 |
# and the GNU General Public License along with this program. If not, see |
|---|
| 18 |
# <http://www.gnu.org/licenses/>. |
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
# Copyright (c) 2005 Software Freedom Law Center |
|---|
| 22 |
# Author: Orion Montoya <orion@mdcclv.com> |
|---|
| 23 |
|
|---|
| 24 |
use Lingua::EN::Sentence qw( get_sentences add_acronyms ); |
|---|
| 25 |
|
|---|
| 26 |
while(<>) { |
|---|
| 27 |
|
|---|
| 28 |
$wholedoc .= $_; |
|---|
| 29 |
|
|---|
| 30 |
} |
|---|
| 31 |
|
|---|
| 32 |
my @paragraphs = split(/\n\n/,$wholedoc); |
|---|
| 33 |
|
|---|
| 34 |
$i = 0; |
|---|
| 35 |
foreach my $paragraph (@paragraphs) { |
|---|
| 36 |
$j = 1; |
|---|
| 37 |
print "<p id=\"autotag.p$i\">\n"; |
|---|
| 38 |
my $sentences=get_sentences($paragraph); |
|---|
| 39 |
foreach my $sentence (@$sentences) { |
|---|
| 40 |
print " <sent id=\"autotag.p$i.s$j\">$sentence</sent>\n"; |
|---|
| 41 |
$j++; |
|---|
| 42 |
} |
|---|
| 43 |
print "</p>\n\n"; |
|---|
| 44 |
$i++; |
|---|
| 45 |
} |
|---|