Foros del Web - Ver Mensaje Individual

Manolait · #5 (**permalink**) 14/05/2013, 18:57

No consigo nada. intentare explicar mi project, es un buscador en lucene ahora mismo estoy creando un directorio y una carpeta donde indexar los documentos. este es mi codigo en java:

Código HTML:

import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.document.*;
import java.io.*;
import java.util.ArrayList;

public class CreaIndice {
	public static void main(String[] args) throws Exception {

		File directorioGuardarIndice = new File("c:/Temp/indice");//crear carpeta indice
		File Documentos = new File("c:/Temp/documentos");//crear carpeta con los documentos a indexar

		Directory RecorreDirectorio = FSDirectory.open(directorioGuardarIndice);

		
		Analyzer analizador = new SpanishAnalyzer(Version.LUCENE_31);//analizador

		IndexWriterConfig configuracionIndice = new IndexWriterConfig(
				Version.LUCENE_31, analizador);

		IndexWriter Indice = new IndexWriter(RecorreDirectorio, configuracionIndice);

		String bold;
        String text;
        String textIMG;
        String title;
        String underline;
        
        String id;
		
		File[] archivos = Documentos.listFiles();//asigna a archivos los documentos de la carpeta
		for (int i = 0; i < archivos.length; i++) {
			
			File f = archivos[i];//asigna a f los archivos recorri
			
			if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
					&& (f.getName().endsWith(".txt"))) {
				System.out.println("Indexing " + f.getCanonicalPath());
				Document doc = new Document();
				
				
				
				
				//bold = leerFichero(File[0]);
                /*text = leerFichero(File[1]);
                textIMG = leerFichero(File[2]);
                title = leerFichero(File[3]);
                underline = leerFichero(File[4]);
                */

				//prueba: asi si indexa bien todos los terminos pero metiendole tu un valor a bolde
				String bolde = "This is the text to be indexed.";
			    doc.add(new Field("bolde", bolde, Field.Store.YES,
			    Field.Index.ANALYZED));
			    
			    String title2 = "prueba con el titulo acné  sábado dolores";
			    doc.add(new Field("title2", title2, Field.Store.YES,
			    Field.Index.ANALYZED));
				
				 
				/* asi no indexa bien el problema es la lectura creo
				// Campo nombre archivo
				//Field camponombre = new Field("rutaArchivo", new InputStreamReader(new FileInputStream(f), "UTF-8"));
				//doc.add(camponombre);
				Field camponombres = new Field("rutaArchivo", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(camponombres);
				//id
				
				Field campoid = new Field("id", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(campoid);
				// Campo contenido del archivo
				Field campocontenido = new Field("contenido", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(campocontenido);
				// Campo bold
				Field campobold = new Field("bold", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(campobold);
				// Campo textimg
				Field campotextimg = new Field("textimg", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(campotextimg);
				// Campo title
				Field campotitle = new Field("title", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(campotitle);
				// Campo underlineWords
				Field campounderlineWords = new Field("underlineWords", f.getName(),Field.Store.YES, Field.Index.ANALYZED);
				doc.add(campounderlineWords);
								
				
				
				*/
				
				
				
				Indice.addDocument(doc);
			}
		}
		Indice.optimize();
		Indice.close();
		System.out.println("el numero de documentos indexados es "
				+ Indice.numDocs());
	}
}

pero no indexa bien, si le paso el string escrito si, pero yo quiero lo que he comentado antes k sepa que el campo bold es el primer termino a leer .para luego pasárle ese valor ya leído a Field para indexarlo.

Este es mi código en c que no se adaptarlo

Código HTML:

 private void BIndexar_Click(object sender, EventArgs e)
        {
            
            String directoryIndex = @"C:\Users\Laura\Documents\Proyectointel\LuceIndex";

            ISet<string> stopWords=new HashSet<string>(SPANISH_STOP_WORDS);

            Analyzer analyzer = new SnowballAnalyzer(Lucene.Net.Util.Version.LUCENE_30, "Spanish", stopWords);

            IndexWriter writer = new IndexWriter(FSDirectory.Open(directoryIndex), analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            String directoryFiles = @"C:\Users\Laura\Documents\Proyectointel\textoplano";

            stopWatch = new Stopwatch();
            stopWatch.Start();

            string[] carpetas = System.IO.Directory.GetDirectories(directoryFiles);

            string bold;
            string text;
            string textIMG;
            string title;
            string underline;
            string id;

             for (int i = 0; i < carpetas.Length; i++)
             {
                 string[] files = System.IO.Directory.GetFiles(directoryFiles + "/" +i + "/", "*.txt");

                 Document doc = new Document();
                
                 bold = leerFichero(files[0]);
                 text = leerFichero(files[1]);
                 textIMG = leerFichero(files[2]);
                 title = leerFichero(files[3]);
                 underline = leerFichero(files[4]);

                 id = i.ToString();

                 doc.Add(new Field("bold", bold, Field.Store.YES, Field.Index.ANALYZED));
                 doc.Add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED));
                 doc.Add(new Field("textIMG", textIMG, Field.Store.YES, Field.Index.ANALYZED));
                 doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
                 doc.Add(new Field("underline", underline, Field.Store.YES, Field.Index.ANALYZED));
                 doc.Add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));

                 writer.AddDocument(doc);
                 LBLucene.Items.Add("Indexado archivo: " + i);
             }

             writer.Optimize();
             writer.Dispose();
             stopWatch.Stop();
             LBLucene.Items.Add("Indexado en: " + getTime());
             LBLucene.TopIndex = LBLucene.Items.Count - 1;
        }