26/12/2008, 16:17
| | Fecha de Ingreso: octubre-2008 Ubicación: Madrid
Mensajes: 352
Antigüedad: 16 años, 5 meses Puntos: 5 | |
Llamar clase desde otra. Tengo el siguiente codigo con un main que si lo ejecuto me saca el texto de un pdf:
package poi;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.encryption.AccessPermission;
import org.pdfbox.pdmodel.encryption.StandardDecryptionMa terial;
import org.pdfbox.util.PDFText2HTML;
import org.pdfbox.util.PDFTextStripper;
public class LeerPDF
public static final String DEFAULT_ENCODING =
//"ISO-8859-6"; //arabic
private static final String PASSWORD = "-password";
private static final String ENCODING = "-encoding";
private static final String CONSOLE = "-console";
private static final String START_PAGE = "-startPage";
private static final String END_PAGE = "-endPage";
private static final String SORT = "-sort";
private static final String HTML = "-html"; // jjb - added simple HTML output
* private constructor.
private LeerPDF()
//static class
* Infamous main method.
* @param args Command line arguments, should be one and a reference to a file.
* @throws Exception If there is an error parsing the document.
public static void main( String[] args ) throws Exception
boolean toConsole = false;
boolean toHTML = false;
boolean sort = false;
String password = "";
String encoding = DEFAULT_ENCODING;
String pdfFile = "C://prueba.pdf";
String textFile = null;
int startPage = 1;
int endPage = Integer.MAX_VALUE;
for( int i=0; i<args.length; i++ )
if( args[i].equals( PASSWORD ) )
if( i >= args.length )
password = args[i];
else if( args[i].equals( ENCODING ) )
if( i >= args.length )
encoding = args[i];
else if( args[i].equals( START_PAGE ) )
if( i >= args.length )
startPage = Integer.parseInt( args[i] );
else if( args[i].equals( HTML ) )
toHTML = true;
else if( args[i].equals( SORT ) )
sort = true;
else if( args[i].equals( END_PAGE ) )
if( i >= args.length )
endPage = Integer.parseInt( args[i] );
else if( args[i].equals( CONSOLE ) )
toConsole = true;
if( pdfFile == null )
pdfFile = args[i];
textFile = args[i];
if( pdfFile == null )
Writer output = null;
PDDocument document = null;
//basically try to load it from a url first and if the URL
//is not recognized then try to load it from the file system.
URL url = new URL( pdfFile );
document = PDDocument.load( url );
String fileName = url.getFile();
if( textFile == null && fileName.length() >4 )
File outputFile =
new File( fileName.substring( 0, fileName.length() -4 ) + ".txt" );
textFile = outputFile.getName();
catch( MalformedURLException e )
document = PDDocument.load( pdfFile );
if( textFile == null && pdfFile.length() >4 )
textFile = pdfFile.substring( 0, pdfFile.length() -4 ) + ".txt";
if( document.isEncrypted() )
StandardDecryptionMaterial sdm = new StandardDecryptionMaterial( password );
document.openProtection( sdm );
AccessPermission ap = document.getCurrentAccessPermission();
if( ! ap.canExtractContent() )
throw new IOException( "You do not have permission to extract text" );
if( toConsole )
output = new OutputStreamWriter( System.out );
if( encoding != null )
output = new OutputStreamWriter(
new FileOutputStream( textFile ), encoding );
//use default encoding
output = new OutputStreamWriter(
new FileOutputStream( textFile ) );
PDFTextStripper stripper = null;
stripper = new PDFText2HTML();
stripper = new PDFTextStripper();
stripper.setSortByPosition( sort );
stripper.setStartPage( startPage );
stripper.setEndPage( endPage );
stripper.writeText( document, output );
if( output != null )
if( document != null )
* This will print the usage requirements and exit.
private static void usage()
System.err.println( "Usage: java org.pdfbox.ExtractText [OPTIONS] <PDF file> [Text File]\n" +
" -password <password> Password to decrypt document\n" +
" -encoding <output encoding> (ISO-8859-1,UTF-16BE,UTF-16LE,...)\n" +
" -console Send text to console instead of file\n" +
" -html Output in HTML format instead of raw text\n" +
" -sort Sort the text before writing\n" +
" -startPage <number> The first page to start extraction(1 based)\n" +
" -endPage <number> The last page to extract(inclusive)\n" +
" <PDF file> The PDF document to use\n" +
" [Text File] The file to write the text to\n"
System.exit( 1 );
La clase funciona perfectamente, pero yo necesito integrala en mi código. Osea mandarle yo el "String pdfFile = "C://prueba.pdf";" mandarle yo ese valor desde otra clase, pero no se como hacerlo ya que coge los arg de arriba en main y no se como hacerlo...
por favor me podéis indicar como debo hacer para que yo el mande a esta clase el string con la ruta del pdf que quiero extraer el texto.
Muchas gracias. |