温馨提示×

温馨提示×

您好,登录后才能下订单哦!

密码登录×
登录注册×
其他方式登录
点击 登录注册 即表示同意《亿速云用户服务条款》

Lucene的Suggest怎么使用

发布时间:2021-12-23 09:18:46 来源:亿速云 阅读:112 作者:iii 栏目:大数据

这篇文章主要讲解了“Lucene的Suggest怎么使用”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“Lucene的Suggest怎么使用”吧!

lucene的联想词是在org.apache.lucene.search.suggest包下边,提供了自动补全或者联想提示功能的支持。

Suggest介绍

<!-- 搜索提示 -->
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-suggest</artifactId>
	<version>7.2.1</version>
</dependency>

Suggest深入

Suggest用例

1. Controller层

@RestController
@RequestMapping(value = "/suggest")
public class SuggestController {


    @Resource
    private SuggestService suggestService;

    /**
     * 推荐词搜索
     * @param keyword
     * @return
     */
    @GetMapping(value = "/searchSuggest")
    public List<DictionaryVO> searchSuggest(String keyword) {
        return suggestService.searchSuggest(keyword);
    }


}

访问地址:
localhost:2000/spring-master/suggest/searchSuggest?keyword=胃造

2. Service层

@Slf4j
@Service
public class SuggestServiceImpl implements SuggestService {

    private AnalyzingInfixSuggester suggester;

    /**
     * 内存存储:优点速度快,缺点程序退出数据就没了
     */
    protected RAMDirectory directory;

    /**
     * 索引分词
     */
    protected StandardAnalyzer indexAnalyzer;

    /**
     * 查询分词
     */
    protected StandardAnalyzer queryAnalyzer;


    @Override
    public List<DictionaryVO> searchSuggest(String keyword) {

        List dictionaryList = new ArrayList();
        HashSet<BytesRef> contexts = new HashSet<BytesRef>();
        // 先根据region域进行suggest,再根据name域进行suggest
//        contexts.add(new BytesRef(region.getBytes("UTF8")));

        // num决定了返回几条数据,参数四表明是否所有TermQuery是否都需要满足,参数五表明是否需要高亮显示
        int num = 10;
        try {
            List<Lookup.LookupResult> results = suggester.lookup(keyword, num, true, false);

            for (Lookup.LookupResult result : results) {
                // result.key中存储的是根据用户输入内部算法进行匹配后返回的suggest内容
                log.info("result_key: " + result.key);
                // 从载荷(payload)中反序列化出Product对象(实际生产中出于降低内存占用考虑一般不会在载荷中存储这么多内容)
                BytesRef bytesRef = result.payload;
                ObjectInputStream objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytesRef.bytes));
                try {
                    DictionaryVO dictionaryVO = (DictionaryVO) objectInputStream.readObject();

                    dictionaryList.add(dictionaryVO);
                } catch (ClassNotFoundException cnfe) {
                    log.error(cnfe.getMessage());
                }
            }
        } catch (Exception e) {
            log.error(e.getMessage());
        }
        return dictionaryList;
    }

    /**
     * 初始化词典
     * @return
     */
    @PostConstruct
    protected void initSuggest() {

        directory = new RAMDirectory();
        indexAnalyzer = new StandardAnalyzer();
        queryAnalyzer = new StandardAnalyzer();

        try {
            suggester = new AnalyzingInfixSuggester(directory, indexAnalyzer, queryAnalyzer,
                    AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, false);

            long start = System.currentTimeMillis();
            // 读DictionaryVO数据
            List diseases = FileUtils.readCsv(SuggestConstants.disease);
            List facultys = FileUtils.readCsv(SuggestConstants.faculty);
            List hospitals = FileUtils.readCsv(SuggestConstants.hospital);
            List drugcatalogues = FileUtils.readCsv(SuggestConstants.drugcatalogue);
            List doctors = FileUtils.readCsv(SuggestConstants.doctor);

            List allTerms = new ArrayList();
            allTerms.addAll(facultys);
            allTerms.addAll(hospitals);
            allTerms.addAll(diseases);
            allTerms.addAll(drugcatalogues);
            allTerms.addAll(doctors);

            // 创建索引,根据InputIterator的具体实现决定数据源以及创建索引的规则
            suggester.build(new DictionaryIterator(allTerms.iterator()));
            suggester.commit();

            long end = System.currentTimeMillis();
            log.info("It takes time to initialize the dictionary:" + (end - start));

            this.initAfter();
        } catch (IOException io) {
            log.error(io.getMessage());
        }
    }

    protected void initAfter() {

    }

    /**
     * 销毁词典
     */
    @PreDestroy
    protected void destroy(){

        try {
            if(suggester != null) {
                suggester.close();
            }

            if(directory != null) {
                directory.close();
            }
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }

        if(indexAnalyzer != null) {
            indexAnalyzer.close();
        }

        if(queryAnalyzer != null) {
            queryAnalyzer.close();
        }
        this.destroyAfter();
    }

    protected void destroyAfter(){

    }
}

3. Util

@Slf4j
public class FileUtils {

    /**
     * 读取词典csv文件
     * @param fileNamePath
     * @return
     */
    public static List<DictionaryVO> readCsv(String fileNamePath) {

        List<DictionaryVO> dictionarys = new ArrayList<>();
        try {
            // 换成你的文件名
            BufferedReader reader = new BufferedReader(new FileReader(fileNamePath));
            String line;
            while ((line = reader.readLine()) != null) {
                // CSV格式文件为逗号分隔符文件,这里根据逗号切分
                String[] item = line.split(",");
                dictionarys.add(new DictionaryVO(item[0], item[1], Long.parseLong(item[2]), Long.parseLong(item[3])));
            }
        } catch (Exception e) {
            e.printStackTrace();
            log.error(e.getMessage());
        }
        return dictionarys;
    }
}

4. Core

package com.spring.master.lucene.suggest.core;

import com.spring.master.lucene.suggest.vo.DictionaryVO;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Set;

/**
 * @author Huan Lee
 * @version 1.0
 * @date 2020-09-10 14:55
 * @describtion 核心类:决定了你的索引是如何创建的,决定了最终返回的提示关键词列表数据及其排序
 *
 */
public class DictionaryIterator implements InputIterator {

    private Iterator<DictionaryVO> dictionaryIterator;

    private DictionaryVO currentDictionary;

    public DictionaryIterator(Iterator<DictionaryVO> dictionaryIterator) {
        this.dictionaryIterator = dictionaryIterator;
    }

    @Override
    public long weight() {
        // TODO 这里可以设置权重 return currentDictionary.getWeight();
        return 1;
    }

    /**
     * 将DictionaryVO对象序列化存入payload
     * @return
     */
    @Override
    public BytesRef payload() {
        try {
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            ObjectOutputStream out = new ObjectOutputStream(bos);
            out.writeObject(currentDictionary);
            out.close();
            return new BytesRef(bos.toByteArray());
        } catch (IOException e) {
            throw new RuntimeException("Well that's unfortunate.");
        }
    }

    @Override
    public boolean hasPayloads() {
        return true;
    }

    /**
     * 设置是否启用Contexts域
     * @return
     */
    @Override
    public boolean hasContexts() {
        return false;
    }

    /**
     * 获取某个term的contexts,用来过滤suggest的内容,如果suggest的列表为空,返回null
     * @return
     */
    @Override
    public Set<BytesRef> contexts() {
//        try {
//            Set<BytesRef> regions = new HashSet<>();
//            regions.add(new BytesRef(currentDictionary.getSourceType().getBytes("UTF8")));
//            return regions;
//        } catch (UnsupportedEncodingException e) {
//            throw new RuntimeException("Couldn't convert to UTF-8");
//        }
        return null;
    }

    @Override
    public BytesRef next() throws IOException {
        if (dictionaryIterator.hasNext()) {
            currentDictionary = dictionaryIterator.next();
            try {
                //返回当前Project的name值,把product类的name属性值作为key
                return new BytesRef(currentDictionary.getWord().getBytes("UTF8"));
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException("Couldn't convert to UTF-8",e);
            }
        } else {
            return null;
        }
    }
}

5. Vo

@Data
public class DictionaryVO implements Serializable {

    public DictionaryVO() {
    }

    public DictionaryVO(String word, String sourceType, Long sourceId, Long weight) {
        this.word = word;
        this.sourceId = sourceId;
        this.sourceType = sourceType;
        this.weight = weight;
    }

    /**
     * 词典
     */
    private String word;

    /**
     * 来源id
     */
    private Long sourceId;

    /**
     * 来源:Doctor、Disease、Hospital、Faculty、Drugcatalogue
     */
    private String sourceType;

    /**
     * 权重
     */
    private Long weight;
}

6. Constant

public class SuggestConstants {

    public static final String faculty = "/Users/lihuan/Documents/projects/git/me/faculty.csv";
    public static final String hospital = "/Users/lihuan/Documents/projects/git/me/hospital.csv";
    public static final String disease = "/Users/lihuan/Documents/projects/git/me/disease.csv";
    public static final String drugcatalogue = "/Users/lihuan/Documents/projects/git/me/drugcatalogue.csv";
    public static final String doctor = "/Users/lihuan/Documents/projects/git/me/doctor.csv";
}

感谢各位的阅读,以上就是“Lucene的Suggest怎么使用”的内容了,经过本文的学习后,相信大家对Lucene的Suggest怎么使用这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!

向AI问一下细节

免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。

AI