This fixes up PDF.cc to handle acroread in Acrobat 4, which has a bug with
the -pairs option.  It turns out that even without the -pairs option,
acroread 4 is still prone to segmentation violations when generating
PostScript, so acroread 3 is a better choice anyway.

Apply this patch AFTER applying the htdig-3.1.2-bugfixes.patch.

--- htdig-3.1.2/htdig/PDF.cc.orig	Tue Mar 23 17:17:33 1999
+++ htdig-3.1.2/htdig/PDF.cc	Fri Aug 13 16:05:16 1999
@@ -104,13 +104,22 @@ PDF::parse(Retriever &retriever, URL &ur
         acroread = "acroread";
 
     // Check for existance of acroread program! (if not, return)
-    //struct stat stat_buf;
-    // Check that it exists, and is a regular file. 
-    //if ((stat(acroread, &stat_buf) == -1) || !S_ISREG(stat_buf.st_mode))
-    //  {
-    //	printf("PDF::parse: cannot find acroread\n");
-    //	return;
-    //  }
+    struct stat stat_buf;
+    static int notfound = 0;
+    if (notfound)	// we only need to complain once
+	return;
+    String arg0 = acroread;
+    char *endarg = strchr(arg0.get(), ' ');
+    if (endarg)
+	*endarg = '\0';
+    // If first arg is a path, check that it exists, and is a regular file. 
+    if (strchr(arg0.get(), '/') &&
+	((stat(arg0.get(), &stat_buf) == -1) || !S_ISREG(stat_buf.st_mode)))
+    {
+	printf("PDF::parse: cannot find pdf parser %s\n", arg0.get());
+	notfound = 1;
+	return;
+    }
 
     // Write the pdf contents in a temp file to give it to acroread
 
@@ -140,9 +149,19 @@ PDF::parse(Retriever &retriever, URL &ur
 
 
     // Use acroread as a filter to convert to PostScript.
-    // Now generalized to allow xpdf as a parser (works with most recent xpdf)
+    // Now generalized to allow xpdf as a parser, or other compatible parsers
+    // (It was claimed it works with most recent xpdf, but it doesn't!)
     //    acroread << " -toPostScript " << pdfName << " " << tmpdir << " 2>&1";
-    acroread << " " << pdfName << " " << psName << " 2>&1";
+    String dest = psName;
+    if (strstr(acroread.get(), "acroread"))
+    {
+	// special-case tests only for acroread (what else you gonna use?)
+	if (!strstr(acroread.get(), "-toPostScript"))
+	    acroread << " -toPostScript ";	// add missing option
+	if (!strstr(acroread.get(), "-pairs"))	// don't use -pairs with 4.0
+	    dest = tmpdir;
+    }
+    acroread << " " << pdfName << " " << dest << " 2>&1";
 
     if (system(acroread))
     {
--- htdig-3.1.2/htcommon/defaults.cc.orig	Thu Mar 25 11:49:40 1999
+++ htdig-3.1.2/htcommon/defaults.cc	Fri Aug 13 16:05:16 1999
@@ -21,7 +21,7 @@ ConfigDefaults	defaults[] =
     {"database_dir",			DATABASE_DIR},
     {"bin_dir",				BIN_DIR},
     {"image_url_prefix",		IMAGE_URL_PREFIX},
-    {"pdf_parser",                      PDF_PARSER " -toPostScript -pairs"},
+    {"pdf_parser",                      PDF_PARSER " -toPostScript"},
     {"version",				VERSION},
 
     //
--- htdig-3.1.2/htdoc/attrs.html.orig	Fri Aug  6 14:00:28 1999
+++ htdig-3.1.2/htdoc/attrs.html	Tue Aug 17 10:55:45 1999
@@ -4271,7 +4271,7 @@
 			<em>default:</em>
 		  </dt>
 		  <dd>
-			acroread -toPostScript -pairs
+			acroread -toPostScript
 		  </dd>
 		  <dt>
 			<em>description:</em>
@@ -4283,14 +4283,33 @@
 		      <em>infile outfile</em>,<br>
 		      where <em>infile</em> is a file to parse and
 		      <em>outfile</em> is the PostScript output of the
-		      parser. The program is supposed to convert to a
+		      parser. In the case where acroread is the parser, and
+		      the -pairs option is not given, the second parameter
+		      will be the output directory rather than the output
+		      file name. The program is supposed to convert to a
 		      variant of PostScript, which is then parsed
-		      internally. Currently, Adobe's <a
+		      internally. Currently, only Adobe's <a
 		      href="http://www.adobe.com/prodindex/acrobat/readstep.html">
-		      acroread</a> program and the pdftops program
-		      that is part of the <a
+		      acroread</a> program has been tested as a pdf_parser.
+		      There is a bug in Acrobat 4's acroread command, which
+		      causes it to fail when -pairs is used, hence the special
+		      case above.<br>
+		       The pdftops program that is part of the <a
 		      href="http://www.foolabs.com/xpdf/">xpdf</a>
-		      0.80 package have been tested as pdf_parsers.
+		      package is not suitable as a pdf_parser,
+		      because its variant of PostScript is slightly
+		      different.  However, an alternative is to
+		      use xpdf's pdftotext program as a component
+		      of an <a href="#external_parsers">external
+		      parser</a> with the xpdf 0.90 package installed
+		      on your system, as described in FAQ question <a
+		      href="FAQ.html#q4.9">4.9</a>.<br>
+		       In either case, to successfully index PDF files,
+		      be sure to set the <a
+		      href="#max_doc_size">max_doc_size</a> attribute
+		      to a value larger than the size of your largest
+		      PDF file. PDF documents can not be parsed if they
+		      are truncated.
 			<p>
 			  The default value of this attribute is determined at
 			  compile time, to include the path to the acroread
@@ -4301,7 +4320,7 @@
 			<em>example:</em>
 		  </dt>
 		  <dd>
-			pdf_parser: /usr/local/bin/acroread -toPostScript -pairs
+			pdf_parser: /usr/local/Acrobat3/bin/acroread -toPostScript -pairs
 		  </dd>
 		</dl>
 	  </dd>

