diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml b/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml index 91f3461bf15e..88beafba26f6 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/pom.xml @@ -222,11 +222,14 @@ org.wso2.carbon.governance org.wso2.carbon.governance.custom.lifecycles.checklist - org.apache.pdfbox pdfbox + + org.apache.pdfbox + pdfbox-io + org.wso2.carbon.governance org.wso2.carbon.governance.lcm diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java index 4fe423e8fe61..f4161bfc273e 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/DocumentIndexer.java @@ -23,7 +23,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.cos.COSDocument; @@ -182,9 +182,9 @@ private String fetchDocumentContent(Registry registry, Resource documentResource inputStream = contentResource.getContentStream(); switch (extension) { case APIConstants.PDF_EXTENSION: - PDFParser pdfParser = new PDFParser(new RandomAccessBufferedFileInputStream(inputStream)); + PDFParser pdfParser = new PDFParser(new RandomAccessReadBuffer(inputStream)); pdfParser.parse(); - COSDocument cosDocument = pdfParser.getDocument(); + COSDocument cosDocument = pdfParser.parse().getDocument(); PDFTextStripper stripper = new PDFTextStripper(); contentString = stripper.getText(new PDDocument(cosDocument)); break; diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java index c65290cac015..803714535b9e 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/src/main/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexer.java @@ -9,7 +9,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.pdfbox.cos.COSDocument; @@ -30,7 +30,7 @@ public IndexDocument getIndexedDocument(File2Index fileData) throws SolrExceptio try { PDFParser parser = getPdfParser(fileData); parser.parse(); - cosDoc = parser.getDocument(); + cosDoc = parser.parse().getDocument(); PDFTextStripper stripper = getPdfTextStripper(); String docText = stripper.getText(new PDDocument(cosDoc)); @@ -70,7 +70,7 @@ protected PDFTextStripper getPdfTextStripper() throws IOException { } protected PDFParser getPdfParser(File2Index fileData) throws IOException { - return new PDFParser(new RandomAccessBufferedFileInputStream(new ByteArrayInputStream(fileData.data))); + return new PDFParser(new RandomAccessReadBuffer(new ByteArrayInputStream(fileData.data))); } } diff --git a/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java b/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java index 64fc21774e6a..2d0f200f7868 100644 --- a/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java +++ b/components/apimgt/org.wso2.carbon.apimgt.impl/src/test/java/org/wso2/carbon/apimgt/impl/indexing/indexer/PDFIndexerTest.java @@ -45,10 +45,13 @@ public void testShouldReturnIndexedDocumentWhenParameterCorrect() throws IOExcep String mediaType = "application/pdf+test"; final String MEDIA_TYPE = "mediaType"; PDFParser parser = Mockito.mock(PDFParser.class); + PDDocument pdDocument = Mockito.mock(PDDocument.class); COSDocument cosDoc = Mockito.mock(COSDocument.class); PDFTextStripper pdfTextStripper = Mockito.mock(PDFTextStripper.class); Mockito.doThrow(IOException.class).when(cosDoc).close(); - Mockito.when(parser.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc); + Mockito.when(parser.parse()).thenReturn(new PDDocument()); + + Mockito.when(pdDocument.getDocument()).thenReturn(new COSDocument()).thenReturn(cosDoc); Mockito.when(pdfTextStripper.getText(new PDDocument())).thenReturn(""); PDFIndexer pdfIndexer = new PDFIndexerWrapper(parser, pdfTextStripper); diff --git a/pom.xml b/pom.xml index 120bea9f3a0e..b0e01d8865fd 100644 --- a/pom.xml +++ b/pom.xml @@ -1383,6 +1383,12 @@ ${pdfbox.version} + + org.apache.pdfbox + pdfbox-io + ${pdfbox.version} + + org.apache.pdfbox fontbox @@ -2140,7 +2146,7 @@ 1.3.12 - 2.0.25 + 3.0.1 2.3.5 2.3.5 1.0.16.wso2v1