Saturday 26 January 2013

Document Indexing


SolrjPopulator.java

import java.io.IOException;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;

import util.Constants;

public class SolrjPopulator {
   public static String[] cat = new String[10];
   static{
       cat[0] = "Electronics" ;
       cat[1] = "Books" ;
       cat[2] = "Memory" ;
       cat[3] = "Mobile Accessories" ;
       cat[4] = "Mobile" ;
       cat[5] = "Computer" ;
       cat[6] = "Computer Accessories" ;
       cat[7] = "Tablets" ;
       cat[8] = "Tables Accessories" ;
       cat[9] = "Home Furnishing" ;
   }
   public static void main(String[] args) throws IOException,
           SolrServerException {
       SolrServer server = new HttpSolrServer(Constants.SERVER_NAME);
       for (int i = 0; i < 10000; ++i) {
           SolrInputDocument doc = new SolrInputDocument();
           doc.addField("cat", cat[i%10]);
           doc.addField("id", cat[i%10] + "-" + i);
           doc.addField("name", "Name for " + cat[i%10] + " :: "+ i);
           server.add(doc);
           if (i % 100 == 0)
               server.commit(); // periodically flush
       }
       server.commit();
   }
}

SolrJSearcher.java

import java.net.MalformedURLException;
import java.util.List;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ModifiableSolrParams;

import util.Constants;

public class SolrJSearcher {

   public static void main(String[] args) throws MalformedURLException,
           SolrServerException {
       SolrServer solr = new HttpSolrServer(Constants.SERVER_NAME);

       ModifiableSolrParams params = new ModifiableSolrParams();
       // params.set("q", "cat:book"); // query string
       params.set("q", "*:*"); // query string
//        params.set("defType", "edismax");
//        params.set("fl", "score,*"); // filter
       // params.set("debugQuery","on");
       params.set("start", "0");
       params.set("rows", "20000");

       QueryResponse response = solr.query(params);
       SolrDocumentList results = response.getResults();
       List<String> keysList = Constants.createKeyList(results);
       for (int i = 0; i < results.size(); ++i) {
           SolrDocument doc = results.get(i);
           
           for(String key : keysList)
           {
               System.out.println(key + ": " + doc.get(key));
           }
           System.out.println("-----------------------------------");
       }
   }
}

Constants.java

package util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;


public class Constants {
   public static final String SERVER_NAME = "http://localhost:8983/solr";
   
   public static HttpSolrServer getSolrServer(){
       return new HttpSolrServer(Constants.SERVER_NAME);
   }
   
   public static List<String> createKeyList(SolrDocumentList results) {
       List<String> keysList = new ArrayList<String>();
       Map<String, Object> fieldValueMap = new HashMap<String,Object>();
       for (int i = 0; i < results.size(); ++i) {
           SolrDocument doc = results.get(i);
           fieldValueMap = doc.getFieldValueMap();
           for(String key : fieldValueMap.keySet())
           {
               if(!keysList.contains(key))
                   keysList.add(key);
            }
       }
       return keysList;
   }
}



Bean Indexing


Item.java

package bean;
import java.util.List;

import org.apache.solr.client.solrj.beans.Field;

public class Item {
  @Field("id")
  public String id;

  @Field("cat")
  public String[] categories;

  @Field
  public List<String> features;
  
  @Field
  public String name;
  
  @Field
  public String manu;

  @Field
  public Float price ;

  @Field
  public int popularity;

  @Field
  public boolean inStock;

  @Field
  public Float weight;

  @Field
  public String includes;

  @Field
  public String payloads;

  @Field
  public String manu_id_s;

  @Field
  public String sku ;

  public String toString(){
      String cat = "" ;
      if(categories!=null)
      {
          cat = "[Categories: " ;
          for(String category : categories)
          {
              cat += category;
          }
          cat = cat + "]" ;
      }
      
      String tempFeatures = "" ;
      if(features!=null)
      {
          tempFeatures = "[Features: " ;
          for(String feat : features)
          {
              tempFeatures += feat;
          }
          tempFeatures += "]" ;
      }

      return this.id + ", " +
              this.name + ", " +
              this.manu + ", " +
              cat + ", " +
              tempFeatures + ", " +
              this.includes + ", " +
              this.payloads + ", " +
              this.popularity + ", " +
              this.manu_id_s + ", " +
              this.sku + ", " +
              this.price + ", " +
              this.weight + ", " +
              this.inStock
              ;
  }
}

BeanPopulator.java

package bean;

import java.io.IOException;
import java.util.Date;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.UpdateResponse;

import util.Constants;

public class BeanPopulator {
   public static void main(String[] args) {
       SolrServer server = Constants.getSolrServer();
       Item item = new Item();
       item.id = "one " +  new Date();
       item.categories = new String[] { "aaa", "bbb", "ccc" };
       item.name  = "name " +  new Date();
       item.manu = "manu " +  new Date();
       item.price=  10.2f;
       item.popularity= 0 ;
       item.inStock= true;
       item.weight= 1.2f;
       item.includes= "one " +  new Date();
       item.payloads= "one " +  new Date();
       item.manu_id_s= "one " +  new Date();
       item.sku= "" +  new Date();
       
       
       try {
           UpdateResponse response = server.addBean(item);
           System.out.println(response.getResponse());
       } catch (IOException e) {
           e.printStackTrace();
       } catch (SolrServerException e) {
           e.printStackTrace();
       }
       /*
        * List<Item> beans ; //add Item objects to the list
        * server.addBeans(beans);
        */
   }
}


BeanReader.java

package bean;

import java.io.IOException;
import java.util.List;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocumentList;

import util.Constants;

public class BeanReader {
   
   public static void main(String[] args) {
       SolrServer server = Constants.getSolrServer();
       SolrQuery query = new SolrQuery();
       query.setQuery( "cat:*" );
//        query.addSortField( "price", SolrQuery.ORDER.asc );
       try {
           QueryResponse rsp = server.query( query );
           SolrDocumentList docs = rsp.getResults();
           List<Item> beans = rsp.getBeans(Item.class);
           for(Item item : beans)
           {
               System.out.println(item.toString());
           }
           
//            deleteAllBeans(beans,server);
           
       } catch (SolrServerException e) {
           e.printStackTrace();
       }
   }
   
   public static void deleteAllBeans(List<Item> beans,SolrServer server){
       try {
           for(Item item : beans)
           {
               System.out.println(item.toString());
               UpdateResponse response = server.deleteById(item.id + "");
               System.out.println(response.getResponse());
           }
           server.commit();
       } catch (SolrServerException e) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       } catch (IOException e) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       }
   }
}


Suggest ( Auto-Complete Functionality)


1) Add following code in ../example/solr/conf/solrconfig.xml:

    <searchComponent class="solr.SpellCheckComponent" name="suggest">
    <lst name="spellchecker">
     <str name="name">suggest</str>
     <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
     <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
     <!-- Alternatives to lookupImpl:
          org.apache.solr.spelling.suggest.fst.FSTLookup   [finite state automaton]
          org.apache.solr.spelling.suggest.fst.WFSTLookupFactory [weighted finite state automaton]
          org.apache.solr.spelling.suggest.jaspell.JaspellLookup [default, jaspell-based]
          org.apache.solr.spelling.suggest.tst.TSTLookup   [ternary trees]
     -->
     <!-- FOR SINGLE FIELD LOOKUP WORDS - DEFAULT -  -->
     <str name="field">name</str>       <!-- the indexed field to derive suggestions from -->
    <float name="threshold">0.005</float>
     <str name="buildOnCommit">true</str>
<!--
     <str name="sourceLocation">american-english</str>
-->
    </lst>
 </searchComponent>
 <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
    <lst name="defaults">
     <str name="spellcheck">true</str>
     <str name="spellcheck.dictionary">suggest</str>
     <str name="spellcheck.onlyMorePopular">true</str>
     <str name="spellcheck.count">100</str>
     <str name="spellcheck.collate">true</str>
    </lst>
    <arr name="components">
     <str>suggest</str>
    </arr>
 </requestHandler>

2) SolrSearchSuggest.java

package search;

import java.util.List;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion;
import org.apache.solr.common.params.ModifiableSolrParams;

import util.Constants;


public class SolrSearchSuggest {
   public static void main(String args[]) {

       SolrServer solr = new HttpSolrServer(Constants.SERVER_NAME);
       ModifiableSolrParams params = new ModifiableSolrParams();
       params.set("qt", "/suggest");
       params.set("q", "s");
       try {
           QueryResponse response = solr.query(params);
           System.out.println(response);
           SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse() ;
           
           List<Suggestion> suggestionList = spellCheckResponse.getSuggestions();
           if(suggestionList!=null && suggestionList.size()>0)
           {
               System.out.println("Suggestion List: ");
               for(Suggestion suggestion : suggestionList)
               {
                   List<String> alternatives = suggestion.getAlternatives() ;
                   if(alternatives!=null && alternatives.size()>0)
                   {
                       for(String alternative : alternatives)
                       {
                           System.out.println(alternative);
                       }
                   }
               }
           }
       } catch (SolrServerException e) {
           e.printStackTrace();
       }
   }
}




File Indexing


1) There is no need to change any configuration for file indexing. Start the server and using following syntax we can index the file.

Open Terminal:

curl "http://localhost:8983/solr/update/extract?literal.id=htmlDoc_tutorial&literal.name=htmlDoc_tutorial&commit=true" -F "myfile=@tutorial.html"

OR

Open Browser:

http://localhost:8983/solr/update/extract?literal.id=htmlDoc_tutorial&literal.name=htmlDoc_tutorial&commit=true" -F "myfile=@tutorial.html

Note: we can add as many literals as defined in the schema.xml:
id, sku, name, manu, cat, features, includes, weight, price, popularity, inStock, title, subject, description, comments, author, keywords, category, content_type, last_modified, links.

2) Using SolrJ we can also index file:

SolrFilePopulator.java

import java.io.File;
import java.io.IOException;
import java.util.Map.Entry;

import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;

import util.Constants;

import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.common.util.NamedList;


public class SolrFilePopulator {
   public static String fileName = "SolrJExample2.java" ;
   public static void main(String[] args) {
       try {
           SolrServer server = new HttpSolrServer(Constants.SERVER_NAME);
           ContentStreamUpdateRequest up = new ContentStreamUpdateRequest(
                   "/update/extract");
           File file = new File( "src/" +fileName);
           if(file.exists())
           {
               up.addFile(file);
               String id = fileName.substring(fileName.lastIndexOf('/') + 1);
               System.out.println(id);
   
               up.setParam("literal.id", id);
               up.setParam("literal.name", fileName);
               up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
   
               NamedList<Object> request = server.request(up);
               for (Entry<String, Object> entry : request) {
                   System.out.println(entry.getKey());
                   System.out.println(entry.getValue());
               }
           }else
           {
               System.out.println("File Does not exist at " + file.getAbsolutePath());
           }
       } catch (IOException e) {
           e.printStackTrace();
       } catch (SolrServerException e) {
           e.printStackTrace();
       }
   }
}





Note: To apply the stemming algorithm (stopword) change the field type of literals to
“text_en_splitting”

like
Original: <field name="name" type="text_general" indexed="true" stored="true"/>
to
  <field name="name" type="text_en_splitting" indexed="true" stored="true"/>

By doing this, when searching on name, stopwords will be ignored. Stopwords are words ending with ing,ed,s,etc. and It also uses the soundex functionality like “Pixima” is wrong spelling and if the data is “Pixma”, it returns the data.

0 comments:

Post a Comment

Find me on Facebook! Follow me on Twitter!