Document Indexing
SolrjPopulator.java
import java.io.IOException;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
import util.Constants;
public class SolrjPopulator {
public static String[] cat = new String[10];
static{
cat[0] = "Electronics" ;
cat[1] = "Books" ;
cat[2] = "Memory" ;
cat[3] = "Mobile Accessories" ;
cat[4] = "Mobile" ;
cat[5] = "Computer" ;
cat[6] = "Computer Accessories" ;
cat[7] = "Tablets" ;
cat[8] = "Tables Accessories" ;
cat[9] = "Home Furnishing" ;
}
public static void main(String[] args) throws IOException,
SolrServerException {
SolrServer server = new HttpSolrServer(Constants.SERVER_NAME);
for (int i = 0; i < 10000; ++i) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("cat", cat[i%10]);
doc.addField("id", cat[i%10] + "-" + i);
doc.addField("name", "Name for " + cat[i%10] + " :: "+ i);
server.add(doc);
if (i % 100 == 0)
server.commit(); // periodically flush
}
server.commit();
}
}
SolrJSearcher.java
import java.net.MalformedURLException;
import java.util.List;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ModifiableSolrParams;
import util.Constants;
public class SolrJSearcher {
public static void main(String[] args) throws MalformedURLException,
SolrServerException {
SolrServer solr = new HttpSolrServer(Constants.SERVER_NAME);
ModifiableSolrParams params = new ModifiableSolrParams();
// params.set("q", "cat:book"); // query string
params.set("q", "*:*"); // query string
// params.set("defType", "edismax");
// params.set("fl", "score,*"); // filter
// params.set("debugQuery","on");
params.set("start", "0");
params.set("rows", "20000");
QueryResponse response = solr.query(params);
SolrDocumentList results = response.getResults();
List<String> keysList = Constants.createKeyList(results);
for (int i = 0; i < results.size(); ++i) {
SolrDocument doc = results.get(i);
for(String key : keysList)
{
System.out.println(key + ": " + doc.get(key));
}
System.out.println("-----------------------------------");
}
}
}
Constants.java
package util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
public class Constants {
public static final String SERVER_NAME = "http://localhost:8983/solr";
public static HttpSolrServer getSolrServer(){
return new HttpSolrServer(Constants.SERVER_NAME);
}
public static List<String> createKeyList(SolrDocumentList results) {
List<String> keysList = new ArrayList<String>();
Map<String, Object> fieldValueMap = new HashMap<String,Object>();
for (int i = 0; i < results.size(); ++i) {
SolrDocument doc = results.get(i);
fieldValueMap = doc.getFieldValueMap();
for(String key : fieldValueMap.keySet())
{
if(!keysList.contains(key))
keysList.add(key);
}
}
return keysList;
}
}
Bean Indexing
Item.java
package bean;
import java.util.List;
import org.apache.solr.client.solrj.beans.Field;
public class Item {
@Field("id")
public String id;
@Field("cat")
public String[] categories;
@Field
public List<String> features;
@Field
public String name;
@Field
public String manu;
@Field
public Float price ;
@Field
public int popularity;
@Field
public boolean inStock;
@Field
public Float weight;
@Field
public String includes;
@Field
public String payloads;
@Field
public String manu_id_s;
@Field
public String sku ;
public String toString(){
String cat = "" ;
if(categories!=null)
{
cat = "[Categories: " ;
for(String category : categories)
{
cat += category;
}
cat = cat + "]" ;
}
String tempFeatures = "" ;
if(features!=null)
{
tempFeatures = "[Features: " ;
for(String feat : features)
{
tempFeatures += feat;
}
tempFeatures += "]" ;
}
return this.id + ", " +
this.name + ", " +
this.manu + ", " +
cat + ", " +
tempFeatures + ", " +
this.includes + ", " +
this.payloads + ", " +
this.popularity + ", " +
this.manu_id_s + ", " +
this.sku + ", " +
this.price + ", " +
this.weight + ", " +
this.inStock
;
}
}
BeanPopulator.java
package bean;
import java.io.IOException;
import java.util.Date;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.UpdateResponse;
import util.Constants;
public class BeanPopulator {
public static void main(String[] args) {
SolrServer server = Constants.getSolrServer();
Item item = new Item();
item.id = "one " + new Date();
item.categories = new String[] { "aaa", "bbb", "ccc" };
item.name = "name " + new Date();
item.manu = "manu " + new Date();
item.price= 10.2f;
item.popularity= 0 ;
item.inStock= true;
item.weight= 1.2f;
item.includes= "one " + new Date();
item.payloads= "one " + new Date();
item.manu_id_s= "one " + new Date();
item.sku= "" + new Date();
try {
UpdateResponse response = server.addBean(item);
System.out.println(response.getResponse());
} catch (IOException e) {
e.printStackTrace();
} catch (SolrServerException e) {
e.printStackTrace();
}
/*
* List<Item> beans ; //add Item objects to the list
* server.addBeans(beans);
*/
}
}
BeanReader.java
package bean;
import java.io.IOException;
import java.util.List;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocumentList;
import util.Constants;
public class BeanReader {
public static void main(String[] args) {
SolrServer server = Constants.getSolrServer();
SolrQuery query = new SolrQuery();
query.setQuery( "cat:*" );
// query.addSortField( "price", SolrQuery.ORDER.asc );
try {
QueryResponse rsp = server.query( query );
SolrDocumentList docs = rsp.getResults();
List<Item> beans = rsp.getBeans(Item.class);
for(Item item : beans)
{
System.out.println(item.toString());
}
// deleteAllBeans(beans,server);
} catch (SolrServerException e) {
e.printStackTrace();
}
}
public static void deleteAllBeans(List<Item> beans,SolrServer server){
try {
for(Item item : beans)
{
System.out.println(item.toString());
UpdateResponse response = server.deleteById(item.id + "");
System.out.println(response.getResponse());
}
server.commit();
} catch (SolrServerException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Suggest ( Auto-Complete Functionality)
1) Add following code in ../example/solr/conf/solrconfig.xml:
<searchComponent class="solr.SpellCheckComponent" name="suggest">
<lst name="spellchecker">
<str name="name">suggest</str>
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
<!-- Alternatives to lookupImpl:
org.apache.solr.spelling.suggest.fst.FSTLookup [finite state automaton]
org.apache.solr.spelling.suggest.fst.WFSTLookupFactory [weighted finite state automaton]
org.apache.solr.spelling.suggest.jaspell.JaspellLookup [default, jaspell-based]
org.apache.solr.spelling.suggest.tst.TSTLookup [ternary trees]
-->
<!-- FOR SINGLE FIELD LOOKUP WORDS - DEFAULT - -->
<str name="field">name</str> <!-- the indexed field to derive suggestions from -->
<float name="threshold">0.005</float>
<str name="buildOnCommit">true</str>
<!--
<str name="sourceLocation">american-english</str>
-->
</lst>
</searchComponent>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
<lst name="defaults">
<str name="spellcheck">true</str>
<str name="spellcheck.dictionary">suggest</str>
<str name="spellcheck.onlyMorePopular">true</str>
<str name="spellcheck.count">100</str>
<str name="spellcheck.collate">true</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
2) SolrSearchSuggest.java
package search;
import java.util.List;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion;
import org.apache.solr.common.params.ModifiableSolrParams;
import util.Constants;
public class SolrSearchSuggest {
public static void main(String args[]) {
SolrServer solr = new HttpSolrServer(Constants.SERVER_NAME);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("qt", "/suggest");
params.set("q", "s");
try {
QueryResponse response = solr.query(params);
System.out.println(response);
SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse() ;
List<Suggestion> suggestionList = spellCheckResponse.getSuggestions();
if(suggestionList!=null && suggestionList.size()>0)
{
System.out.println("Suggestion List: ");
for(Suggestion suggestion : suggestionList)
{
List<String> alternatives = suggestion.getAlternatives() ;
if(alternatives!=null && alternatives.size()>0)
{
for(String alternative : alternatives)
{
System.out.println(alternative);
}
}
}
}
} catch (SolrServerException e) {
e.printStackTrace();
}
}
}
File Indexing
1) There is no need to change any configuration for file indexing. Start the server and using following syntax we can index the file.
Open Terminal:
curl "http://localhost:8983/solr/update/extract?literal.id=htmlDoc_tutorial&literal.name=htmlDoc_tutorial&commit=true" -F "myfile=@tutorial.html"
OR
Open Browser:
http://localhost:8983/solr/update/extract?literal.id=htmlDoc_tutorial&literal.name=htmlDoc_tutorial&commit=true" -F "myfile=@tutorial.html
Note: we can add as many literals as defined in the schema.xml:
id, sku, name, manu, cat, features, includes, weight, price, popularity, inStock, title, subject, description, comments, author, keywords, category, content_type, last_modified, links.
2) Using SolrJ we can also index file:
SolrFilePopulator.java
import java.io.File;
import java.io.IOException;
import java.util.Map.Entry;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import util.Constants;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.common.util.NamedList;
public class SolrFilePopulator {
public static String fileName = "SolrJExample2.java" ;
public static void main(String[] args) {
try {
SolrServer server = new HttpSolrServer(Constants.SERVER_NAME);
ContentStreamUpdateRequest up = new ContentStreamUpdateRequest(
"/update/extract");
File file = new File( "src/" +fileName);
if(file.exists())
{
up.addFile(file);
String id = fileName.substring(fileName.lastIndexOf('/') + 1);
System.out.println(id);
up.setParam("literal.id", id);
up.setParam("literal.name", fileName);
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
NamedList<Object> request = server.request(up);
for (Entry<String, Object> entry : request) {
System.out.println(entry.getKey());
System.out.println(entry.getValue());
}
}else
{
System.out.println("File Does not exist at " + file.getAbsolutePath());
}
} catch (IOException e) {
e.printStackTrace();
} catch (SolrServerException e) {
e.printStackTrace();
}
}
}
Note: To apply the stemming algorithm (stopword) change the field type of literals to
“text_en_splitting”
like
Original: <field name="name" type="text_general" indexed="true" stored="true"/>
to
<field name="name" type="text_en_splitting" indexed="true" stored="true"/>
By doing this, when searching on name, stopwords will be ignored. Stopwords are words ending with ing,ed,s,etc. and It also uses the soundex functionality like “Pixima” is wrong spelling and if the data is “Pixma”, it returns the data.
0 comments:
Post a Comment