Software Freedom Law Center

root/trunk/apps/pdfdiff/pdfdiff.in

Revision 91, 5.4 kB (checked in by bkuhn, 8 months ago)

r76@hughes: bkuhn | 2008-04-27 20:49:20 -0400

  • Wrote basics for PDF::Extract::Text
  • Fixed various things in pdfdiff.in
  • Fixed version number of PDF::OCR required, and where version is
    obtained for PDF::Extract::Text
Line 
1 #!@PERL@ -w
2 #  pdfdiff, Version @VERSION@                                      -*- Perl -*-
3 #     This program is a a front-end script to use B::JVM::Kawa to generate
4 #     JVM bytecode for Perl programs
5 #
6 # Copyright (C) 2008  Bradley M. Kuhn.
7 # Copyright (C) 2008  Software Freedom Law Center, Inc.
8 #
9 #    This software gives you freedom; it is licensed to you under version 3
10 #    of the GNU General Public License, as published by the Free Software
11 #    Foundation.
12 #
13 #    This software is distributed WITHOUT ANY WARRANTY, without even the
14 #    implied warranties of MERCHANTABILITY and FITNESS FOR A PARTICULAR
15 #    PURPOSE.  See the GNU Affero General Public License for further
16 #    details.
17 #
18 #    You should have received a copy of the GNU General Public License,
19 #    version 3 with this software in the file GPLv3 in this directory.  If
20 #    not, see <http://www.gnu.org/licenses/>.
21
22 use File::Basename qw(&basename &dirname);
23 use Cwd;
24
25 require @PERL_VERSION_NEEDED@;
26
27 use strict;
28 use warnings;
29
30 use Getopt::Long qw(:config auto_help bundling);
31 use Pod::Usage;
32 use PDF::Extract::Text;
33 use File::Temp ();
34
35 my $DIFF_CMD  = '@DIFF@';
36 my $PDIFF_CMD = '@PDIFF@';
37 my $MELD_CMD = '@MELD@';
38
39 =head1 SYNOPSIS
40
41 pdfdiff [options] <old_file.pdf> <new_file.pdf>
42
43 General Options:
44
45   --save-intermediates=TEMPLATE    Saves the text files generated.
46
47 Output Options:
48
49  By default, output will be regular DIFF style.
50
51   --options=OPTIONS    Add OPTIONS to diff/merge command.
52
53  Three output styles are supported:
54
55  DIFF
56    Uses the system's 'diff' program, @DIFF@.
57      --diff=DIFF_CMD           Run an alternative DIFF_CMD instead of @DIFF@.
58
59  PDIFF
60    Uses the @PDIFF@ program from a2ps to generate a Postscript markup.
61
62      --pdiff=PSFILE_OUTPUT      Use "pdiff" to generate output to PSFILE_OUTPUT
63
64  MELD
65    Uses the GUI program, @MELD@ to merge the two versions.
66
67      --meld                    Use @MELD@
68      --meld-options=OPTIONS    Add OPTIONS to the pdiff command
69
70 =head1 DESCRIPTION
71
72 pdfdiff attempts to do anything it can to extract the text from two PDFs,
73 show you the differences, and try to help you merge the versions if you
74 want to.
75
76 =head1 OPTIONS
77
78 =over 8
79
80 =item B<--save-intermediates=TEMPLATE>
81
82 Save the text files generated before the diff for the old and new PDF.
83 The output is put in TEMPLATE-old.txt and TEMPLATE-new.txt, respectively.
84 TEMPLATE can include a path name.
85
86 =item B<--diff=DIFF_CMD>
87
88 Use DIFF_CMD instead of @DIFF@ when running the diff.
89
90
91 =item B<--pdiff>
92
93 Use @PDIFF@ to generate Postscript output differences.  Given a string
94 argument, POSTSCRIPT_OUTPUT, which will be the postscript output file.
95
96
97 =item B<--meld>
98
99 Use @MELD@.
100
101 =item B<--options>
102
103 Given a string argument, OPTIONS, those options will be passed to the
104 chosen diff command or merging process.
105
106 =item B<--verbose>
107
108 Be noisy about what is being done.  Note that this is very verbose.  It's a firehouse or nothing.
109
110 =item B<--help>
111
112 This help page.
113
114 =back
115
116 =cut
117
118 my($saveTemplate, $pdiffOutputFile, $extraDiffOptions,$diffCmd, $useMeld,
119    $help, $verbose);
120
121 GetOptions("save-intermediates=s" => \$saveTemplate,
122            "pdiff=s"              => \$pdiffOutputFile,
123            "options=s"            => \$extraDiffOptions,
124            "diff=s"               => \$diffCmd,
125            "meld"                 => \$useMeld,
126            "help"                 => \$help,
127            "verbose"              => \$verbose
128 ) or pod2usage(2);
129 pod2usage(-exitstatus => 0, -verbose => 2) if $help;
130 pod2usage("$0: takes exactly two files.\n")  if (@ARGV != 2);
131
132 foreach my $file (@ARGV) {
133   pod2usage("$0: cannot read $file: $!")  unless (-r $file);
134 }
135
136 my($oldFile, $newFile) = @ARGV;
137
138 if ($verbose) {
139   $PDF::OCR::Thorough::DEBUG = 1;
140 }
141
142 my($oldTextFile, $newTextFile, $oldFH, $newFH);
143
144 if ($saveTemplate) {
145   $oldTextFile = "${saveTemplate}-old.txt";
146   $newTextFile =  "${saveTemplate}-new.txt";
147   $oldFH = new IO::File ">$oldTextFile" or
148     pod2usage("$0: cannot open $oldTextFile for writing: $!");
149   $newFH = new IO::File ">$newTextFile" or
150     pod2usage("$0: cannot open $newTextFile for writing: $!");
151 } else {
152   $oldFH = new File::Temp(TEMPLATE => 'oldXXXXXXXX',
153                           UNLINK => 1, SUFFIX => '.txt', DIR => File::Spec->tmpdir);
154   $oldTextFile = "$oldFH";
155   $newFH = new File::Temp(TEMPLATE => 'newXXXXXXXXX',
156                           UNLINK => 1, SUFFIX => '.txt', DIR => File::Spec->tmpdir);
157   $newTextFile = "$newFH";
158 }
159
160 my $oldExtract = new PDF::Extract::Text(pdfFile => $oldFile)
161   or die("cannot do PDF::OCR::Thorough on $oldFile");
162 my $newExtract = new PDF::Extract::Text(pdfFile => $newFile)
163   or die("cannot do PDF::OCR::Thorough on $newFile");
164
165 my $oldText = $oldExtract->getText();
166 my $newText = $newExtract->getText();
167
168 print $oldFH $oldText;
169 print $newFH $newText;
170
171 $oldFH->close();
172 $newFH->close();
173
174 my(@commands);
175
176 if ($pdiffOutputFile) {
177   my $list = [ $PDIFF_CMD, "-o", $pdiffOutputFile ];
178   push(@{$list}, $extraDiffOptions) if $extraDiffOptions;
179   push(@{$list}, $oldTextFile, $newTextFile);
180   push(@commands, $list);
181 }
182 if ($useMeld) {
183   my $list = [ $MELD_CMD ];
184   push(@{$list}, $extraDiffOptions) if $extraDiffOptions;
185   push(@{$list}, $oldTextFile, $newTextFile);
186   push(@commands, $list);
187 }
188
189 unless ($pdiffOutputFile or $useMeld) {
190   my $list = [ ($diffCmd ? $diffCmd : $DIFF_CMD) ];
191   push(@{$list}, $extraDiffOptions) if $extraDiffOptions;
192   push(@{$list}, $oldTextFile, $newTextFile);
193   push(@commands, $list);
194 }
195 foreach my $cmd (@commands) {
196   system(@{$cmd});
197 }
198 $oldFH = $newFH = undef;
199
200 exit 0;
Note: See TracBrowser for help on using the browser.

SFLC Main Page

[frdm] Support SFLC