public interface SynonymEngine { String[] getSynonyms(String key); }
public class SynonymEngineImpl implements SynonymEngine { private static HashMap<String,String[]> map = new HashMap<String ,String[]>(); static { map.put("quick",new String[]{"fast","speedy"}); map.put("jumps",new String[]{"leaps","hops"}); map.put("over",new String[]{"above"}); map.put("lazy",new String[]{"apathetic","sluggish"}); map.put("dog",new String[]{"canine","pooch"}); } @Override public String[] getSynonyms(String key) { // TODO Auto-generated method stub return map.get(key); } }
public class SynonymFilter extends TokenFilter { private SynonymEngine engine; private CharTermAttribute ct; private PositionIncrementAttribute pt; private Stack<String> stack; private AttributeSource.State current; protected SynonymFilter(TokenStream input,SynonymEngine engine) { super(input); this.engine = engine; ct = this.addAttribute(CharTermAttribute.class); pt = this.addAttribute(PositionIncrementAttribute.class); stack = new Stack<String>(); } @Override public boolean incrementToken() throws IOException { if(stack.size()>0) { this.restoreState(current); String p = stack.pop(); ct.setEmpty(); ct.append(p); pt.setPositionIncrement(0); return true; } System.out.println("++++++"+ct); if(!input.incrementToken()) return false; System.out.println("------"+ct); if(addSynonym(ct.toString())) { current = this.captureState(); } return true; } private boolean addSynonym(String name) { String[] sa = engine.getSynonyms(name); if(sa != null && sa.length>0) { for(String s:sa) { stack.push(s); } return true; } else { return false; } } }
public class SynonymAnalyzer extends Analyzer { private SynonymEngine engine; public SynonymAnalyzer(SynonymEngine engine) { this.engine = engine; } @Override public TokenStream tokenStream(String s, Reader reader) { // TODO Auto-generated method stub return new SynonymFilter(new StopFilter(Version.LUCENE_35, new LowerCaseFilter(Version.LUCENE_35, new StandardFilter(Version.LUCENE_35, new StandardTokenizer(Version.LUCENE_35,reader))) ,StopAnalyzer.ENGLISH_STOP_WORDS_SET),engine); } }
public class TestSynonym { private RAMDirectory directory; @Test public void init() { directory = new RAMDirectory(); SynonymEngine engine = new SynonymEngineImpl(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,new SynonymAnalyzer(engine)); String content = "The quick brown fox jumps over the lazy dog"; try { IndexWriter writer = new IndexWriter(directory,config); Document doc = new Document(); doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(new TermQuery(new Term("content","pooch")),10); for(ScoreDoc sd:docs.scoreDocs) { Document d = searcher.doc(sd.doc); System.out.println(d.get("content")); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。