http://www.technicalpage.net/search/label/SQL

Read PDF , Validate text in the PDF.

Read PDF and validate TEXT in the PDF.

Below code will read the pdf document.
You can also validate the text present in the pdf using this code.
In below code, we have used pdfbox-app-2.0.15 jar and fontbox-2.0.15 jar.


1. When the "pdf" is a file in your computer/system.

package readPDFTest;

import java.io.File;
import java.io.FileInputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

public class ReadPDF {

       public static void main(String[] args) throws Exception {
             
              File fle = new File("C://....//Test.pdf");
              FileInputStream fis = new FileInputStream(fle);       

              PDDocument doc = PDDocument.load(fis);
              PDFTextStripper pdfTxtStrp = new PDFTextStripper ();
              String act_pdfText = pdfTxtStrp.getText(doc);
              act_pdfText_noSpace = act_pdfText.replaceAll("\\s+","");

       //     System.out.println("pdfText is :\n" +act_pdfText); // This will print                    all the text from the pdf file. Remove comment "//" from the front if                 you want this statement to be executed with the code.
             String textExpected = "Type text or sentence here";
             textExpected_noSpace = textExpected.replaceAll("\\s+","");
             

              if (act_pdfText_noSpace.contains(textExpected_noSpace)) {
                     System.out.println("PASS - the text is present.");
              } else {
                     System.out.println("FAIL - the text is not present.");
              }
             
       }

}


2. When the pdf is online file ,ie, in the internet.

In below code, we have used below jars.
pdfbox-1.8.1.jar
fontbox-1.8.1.jar

import java.io.BufferedInputStream;
import java.net.URL;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

public class ReadPDF {

       public static void main(String[] args) throws Exception {
             
              URL pdfURL = new URL("https://...online location of pdf file");

BufferedInputStream bufferIS = new BufferedInputStream(pdfURL.openStream());
PDFParser pdfPSHR = new PDFParser(bufferIS);
pdfPSHR.parse();

COSDocument  getDoc = pdfPSHR.getDocument();
PDFTextStripper pdfTXTstrp = new PDFTextStripper();
//pdfTXTstrp.setStartPage(1); //uncomment this if you want to test from starting page number to ending page number. This statement specifies starting page number.
//pdfTXTstrp.setEndPage(5); //uncomment this if you want to test from starting page number to ending page number. This statement specifies ending page number.

PDDocument doc = new PDDocument(getDoc);
String actualTEXT = pdfTXTstrp.getText(doc);
String actualTEXT_NoSpace = actualTEXT.replaceAll("\\s+","");


String expectedTEXT = "Type text or sentence here";
String expectedTEXT_NoSpace = expectedTEXT.replaceAll("\\s+","");

             
              if (actualTEXT_NoSpace.contains(expectedTEXT_NoSpace)) {
                     System.out.println("PASS");
              } else {
                     System.out.println("FAIL");
              }
             
       }

}




No comments:

Post a Comment