Changeset 70
- Timestamp:
- 04/27/08 20:56:37 (2 months ago)
- Files:
-
- trunk/apps/pdfdiff/ChangeLog (modified) (1 diff)
- trunk/apps/pdfdiff/pdfdiff.in (modified) (4 diffs)
- trunk/apps/pdfdiff/Perl/lib/PDF/Extract/Text.pm (modified) (2 diffs)
- trunk/apps/pdfdiff/Perl/Makefile.PL (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/apps/pdfdiff/ChangeLog
r67 r70 1 2008-04-27 Bradley M. Kuhn <bkuhn@ebb.org> 2 3 * Perl/lib/PDF/Extract/Text.pm (new): Wrote function. 4 (PDF::OCR::Thorough::_pdftotext): Wrote override of Thorough 5 function. 6 (getText): Wrote function. 7 trunk/apps/pdfdiff/pdfdiff.in
r66 r70 29 29 30 30 use Getopt::Long qw(:config auto_help bundling); 31 use PDF::OCR::Thorough; 31 use Pod::Usage; 32 use PDF::Extract::Text; 32 33 33 34 =head1 SYNOPSIS … … 111 112 =cut 112 113 114 my($saveTemplate, $pdiffOutputFile, $extraDiffOptions,$diffCmd, $useMeld, 115 $help, $verbose); 116 113 117 GetOptions("save-intermediates=s" => \$saveTemplate, 114 118 "pdiff=s" => \$pdiffOutputFile, … … 116 120 "diff=s" => \$diffCmd, 117 121 "meld" => \$useMeld, 118 "help" => \$help 122 "help" => \$help, 119 123 "verbose" => \$verbose 120 124 ) or pod2usage(2); … … 133 137 if ($saveTemplate) { 134 138 open(OLD_TEXT, ">${saveTemplate}-old.txt") or 135 pod2usage("$0: cannot open ${saveTemplate}-old.txt for writing: $!" ;139 pod2usage("$0: cannot open ${saveTemplate}-old.txt for writing: $!"); 136 140 open(NEW_TEXT, ">${saveTemplate}-new.txt") or 137 pod2usage("$0: cannot open ${saveTemplate}-new.txt for writing: $!" ;141 pod2usage("$0: cannot open ${saveTemplate}-new.txt for writing: $!"); 138 142 } 139 my $old Text = new PDF::OCR::Thorough($oldFile)143 my $oldExtract = new PDF::Extract::Text(pdfFile => $oldFile) 140 144 or die("cannot do PDF::OCR::Thorough on $oldFile"); 141 my $new Text = new PDF::OCR::Thorough($newFile)145 my $newExtract = new PDF::Extract::Text(pdfFile => $newFile) 142 146 or die("cannot do PDF::OCR::Thorough on $newFile"); 147 148 my $oldText = $oldExtract->getText(); 149 my $newText = $newExtract->getText(); 143 150 144 151 print "OLD: $oldText\n"; trunk/apps/pdfdiff/Perl/lib/PDF/Extract/Text.pm
r68 r70 20 20 use warnings; 21 21 22 BEGIN { 23 use PDF::OCR::Thorough; 24 sub PDF::OCR::Thorough::_pdftotext { 25 my $self = shift; 26 $self->{pdftotextbin} ||= PDF::OCR::Thorough::which('pdftotext') or die("missing pdftotext?"); 27 return ($self->{pdftotextbin}, '-layout', '-nopgbrk'); 28 } 29 } 30 31 package PDF::Extract::Text; 32 22 33 require Exporter; 23 34 use AutoLoader qw(AUTOLOAD); 35 use Carp qw(croak); 36 use PDF::OCR::Thorough; 24 37 25 38 =head1 NAME … … 58 71 =head1 PUBLIC METHODS 59 72 73 =head2 new PDF::Extract::Text 74 75 Creates a new PDF::Extract::Text object 76 77 new PDF::Extract::Text(pdfFile => $pdfFileName) 78 60 79 =cut 61 80 81 sub new { 82 my $this = shift; 83 my $class = ref($this) || $this; 84 my $self = {}; 85 bless $self, $class; 86 my(%args) = @_; 87 88 $self->{pdfFile} = $args{pdfFile}; 89 90 croak("usage: new PDF::Extract::Text(pdfFile => \$pdfFileName)") 91 unless defined $self->{pdfFile}; 92 croak("cannot open $self->{pdfFile} for reading: $!") 93 unless -r $self->{pdfFile}; 94 95 $self->{ocrObj} = new PDF::OCR::Thorough($self->{pdfFile}); 96 return $self; 97 } 98 99 =head2 getText 100 101 Returns the text, formatted as best as is possible. 102 103 =cut 104 105 sub getText { 106 my $self = shift; 107 108 return $self->{ocrObj}->get_text; 109 } 110 111 1; 112 113 __END__ trunk/apps/pdfdiff/Perl/Makefile.PL
r69 r70 27 27 PREREQ_FATAL => 1, 28 28 PREREQ_PM => { 29 'PDF::OCR' => '1. 7'29 'PDF::OCR' => '1.07' 30 30 }, 31 31 MAKEFILE => "Makefile.perl-generated", 32 32 NAME => 'PDF::Extract::Text', 33 VERSION_FROM => "lib/PDF/Extract/Text/Version.pm .in",33 VERSION_FROM => "lib/PDF/Extract/Text/Version.pm", 34 34 35 35 AUTHOR => 'Bradley M. Kuhn <bkuhn@ebb.org>',