This is a slightly more advanced example of using the
Apache PDFBox library. It shows how to replace strings of text in a PDF with some other string. This only works for simple text occurrences. In many cases, text is stored in a more complicated way where it can not be substituted easily. See
https://pdfbox.apache.org/2.0/migration.html#why-was-the-replacetext-example-removed for details on that.
If you're new to PDFBox, start with the PdfBox example rather than this one.
import java.awt.Color;
import java.io.*;
import java.util.*;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
public class SimpleReplace {
public static void main (String[] args) throws Exception {
String outputFileName = "SimpleReplace.pdf";
String encoding = "ISO-8859-1";
PDDocument document = new PDDocument();
PDPage page1 = new PDPage(PDRectangle.A4);
PDRectangle rect = page1.getMediaBox();
document.addPage(page1);
PDFont fontPlain = PDType1Font.HELVETICA;
PDPageContentStream cos = new PDPageContentStream(document, page1);
cos.beginText();
cos.setFont(fontPlain, 12);
cos.newLineAtOffset(100, rect.getHeight() - 50);
cos.showText("Hello World, Hello World");
cos.endText();
cos.close();
searchReplace("Hello", "Hi", encoding, true, document);
searchReplace("World", "Earth", encoding, false, document);
document.save(outputFileName);
document.close();
}
private static void searchReplace (String search, String replace,
String encoding, boolean replaceAll, PDDocument doc) throws IOException {
PDPageTree pages = doc.getDocumentCatalog().getPages();
for (PDPage page : pages) {
PDFStreamParser parser = new PDFStreamParser(page);
parser.parse();
List tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++) {
Object next = tokens.get(j);
if (next instanceof Operator) {
Operator op = (Operator) next;
if (op.getName().equals("Tj")) {
COSString previous = (COSString) tokens.get(j-1);
String string = previous.getString();
if (replaceAll)
string = string.replaceAll(search, replace);
else
string = string.replaceFirst(search, replace);
previous.setValue(string.getBytes());
} else if (op.getName().equals("TJ")) {
COSArray previous = (COSArray) tokens.get(j-1);
for (int k = 0; k < previous.size(); k++) {
Object arrElement = previous.getObject(k);
if (arrElement instanceof COSString) {
COSString cosString = (COSString) arrElement;
String string = cosString.getString();
if (replaceAll)
string = string.replaceAll(search, replace);
else
string = string.replaceFirst(search, replace);
cosString.setValue(string.getBytes());
}
}
}
}
}
PDStream updatedStream = new PDStream(doc);
OutputStream out = updatedStream.createOutputStream();
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
out.close();
page.setContents(updatedStream);
}
}
}
CodeSnippets