public class PDFParser extends AbstractParser
Modifier and Type | Field and Description |
---|---|
protected static org.slf4j.Logger |
log |
content, encoding, filename, locale
Constructor and Description |
---|
PDFParser() |
Modifier and Type | Method and Description |
---|---|
String |
getAuthor() |
String |
getSourceDate() |
String |
getTags() |
String |
getTitle() |
void |
internalParse(InputStream input)
Invoked by the parse method
|
protected void |
parseDocument(org.apache.pdfbox.pdmodel.PDDocument pdfDocument)
Extract text and metadata from the main document
|
void |
parseForm(org.apache.pdfbox.pdmodel.PDDocument pdfDocument)
Extract the text from the form fields
|
getContent, getEncoding, getFilename, getLocale, getVersion, parse, parse, setEncoding, setFilename, setLocale
public String getAuthor()
getAuthor
in interface Parser
getAuthor
in class AbstractParser
public String getSourceDate()
getSourceDate
in interface Parser
getSourceDate
in class AbstractParser
public String getTags()
getTags
in interface Parser
getTags
in class AbstractParser
public String getTitle()
getTitle
in interface Parser
getTitle
in class AbstractParser
public void internalParse(InputStream input)
AbstractParser
internalParse
in class AbstractParser
protected void parseDocument(org.apache.pdfbox.pdmodel.PDDocument pdfDocument)
public void parseForm(org.apache.pdfbox.pdmodel.PDDocument pdfDocument) throws IOException
IOException
Copyright © 2008-2014 Logical Objects. All Rights Reserved.