Software Freedom Law Center

Changeset 70

Show
Ignore:
Timestamp:
04/27/08 20:56:37 (2 months ago)
Author:
bkuhn
Message:

r76@hughes: bkuhn | 2008-04-27 20:49:20 -0400

  • Wrote basics for PDF::Extract::Text
  • Fixed various things in pdfdiff.in
  • Fixed version number of PDF::OCR required, and where version is
    obtained for PDF::Extract::Text
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/apps/pdfdiff/ChangeLog

    r67 r70  
     12008-04-27  Bradley M. Kuhn  <bkuhn@ebb.org> 
     2 
     3        * Perl/lib/PDF/Extract/Text.pm (new): Wrote function. 
     4        (PDF::OCR::Thorough::_pdftotext): Wrote override of Thorough 
     5        function. 
     6        (getText): Wrote function. 
     7 
  • trunk/apps/pdfdiff/pdfdiff.in

    r66 r70  
    2929 
    3030use Getopt::Long qw(:config auto_help bundling); 
    31 use PDF::OCR::Thorough; 
     31use Pod::Usage; 
     32use PDF::Extract::Text; 
    3233 
    3334=head1 SYNOPSIS 
     
    111112=cut 
    112113 
     114my($saveTemplate, $pdiffOutputFile, $extraDiffOptions,$diffCmd, $useMeld, 
     115   $help, $verbose); 
     116 
    113117GetOptions("save-intermediates=s" => \$saveTemplate, 
    114118           "pdiff=s"              => \$pdiffOutputFile, 
     
    116120           "diff=s"               => \$diffCmd, 
    117121           "meld"                 => \$useMeld, 
    118            "help"                 => \$help 
     122           "help"                 => \$help, 
    119123           "verbose"              => \$verbose 
    120124) or pod2usage(2); 
     
    133137if ($saveTemplate) { 
    134138  open(OLD_TEXT, ">${saveTemplate}-old.txt") or 
    135     pod2usage("$0: cannot open ${saveTemplate}-old.txt for writing: $!"
     139    pod2usage("$0: cannot open ${saveTemplate}-old.txt for writing: $!")
    136140  open(NEW_TEXT, ">${saveTemplate}-new.txt") or 
    137     pod2usage("$0: cannot open ${saveTemplate}-new.txt for writing: $!"
     141    pod2usage("$0: cannot open ${saveTemplate}-new.txt for writing: $!")
    138142} 
    139 my $oldText = new PDF::OCR::Thorough($oldFile) 
     143my $oldExtract = new PDF::Extract::Text(pdfFile => $oldFile) 
    140144  or die("cannot do PDF::OCR::Thorough on $oldFile"); 
    141 my $newText = new PDF::OCR::Thorough($newFile) 
     145my $newExtract = new PDF::Extract::Text(pdfFile => $newFile) 
    142146  or die("cannot do PDF::OCR::Thorough on $newFile"); 
     147 
     148my $oldText = $oldExtract->getText(); 
     149my $newText = $newExtract->getText(); 
    143150 
    144151print "OLD: $oldText\n"; 
  • trunk/apps/pdfdiff/Perl/lib/PDF/Extract/Text.pm

    r68 r70  
    2020use warnings; 
    2121 
     22BEGIN { 
     23  use PDF::OCR::Thorough; 
     24  sub PDF::OCR::Thorough::_pdftotext { 
     25    my $self = shift; 
     26    $self->{pdftotextbin} ||= PDF::OCR::Thorough::which('pdftotext') or die("missing pdftotext?"); 
     27    return ($self->{pdftotextbin}, '-layout', '-nopgbrk'); 
     28  } 
     29} 
     30 
     31package PDF::Extract::Text; 
     32 
    2233require Exporter; 
    2334use AutoLoader qw(AUTOLOAD); 
     35use Carp qw(croak); 
     36use PDF::OCR::Thorough; 
    2437 
    2538=head1 NAME 
     
    5871=head1 PUBLIC METHODS 
    5972 
     73=head2 new PDF::Extract::Text 
     74 
     75Creates a new PDF::Extract::Text object 
     76 
     77 new PDF::Extract::Text(pdfFile => $pdfFileName) 
     78 
    6079=cut 
    6180 
     81sub new { 
     82    my $this = shift; 
     83    my $class = ref($this) || $this; 
     84    my $self = {}; 
     85    bless $self, $class; 
     86    my(%args) = @_; 
     87 
     88    $self->{pdfFile} = $args{pdfFile}; 
     89 
     90    croak("usage: new PDF::Extract::Text(pdfFile => \$pdfFileName)") 
     91      unless defined $self->{pdfFile}; 
     92    croak("cannot open $self->{pdfFile} for reading: $!") 
     93      unless -r $self->{pdfFile}; 
     94 
     95    $self->{ocrObj} = new PDF::OCR::Thorough($self->{pdfFile}); 
     96    return $self; 
     97  } 
     98 
     99=head2 getText 
     100 
     101Returns the text, formatted as best as is possible. 
     102 
     103=cut 
     104 
     105sub getText { 
     106  my $self = shift; 
     107 
     108  return $self->{ocrObj}->get_text; 
     109} 
     110 
     1111; 
     112 
     113__END__ 
  • trunk/apps/pdfdiff/Perl/Makefile.PL

    r69 r70  
    2727    PREREQ_FATAL => 1, 
    2828   PREREQ_PM => { 
    29       'PDF::OCR'                                         => '1.7' 
     29      'PDF::OCR'                                         => '1.07' 
    3030   }, 
    3131    MAKEFILE => "Makefile.perl-generated", 
    3232    NAME           => 'PDF::Extract::Text', 
    33     VERSION_FROM   => "lib/PDF/Extract/Text/Version.pm.in", 
     33    VERSION_FROM   => "lib/PDF/Extract/Text/Version.pm", 
    3434 
    3535    AUTHOR         => 'Bradley M. Kuhn <bkuhn@ebb.org>', 

SFLC Main Page

[frdm] Support SFLC