|
对搜索结果里的关键字进行高亮显示,在Lucen int action 这本书附带的源代码里包含了highlightor.jar包,但是这个包对中文支持不是很,它会把关键字所在的那句话整句高亮,于是我个人写了一个用于高亮显示关键词的类,源代码如下:我这里用的只是简单的字符串匹配,然后字符串替换,在java里还有个正则表达式,我也研究的比较深入,迟点将会用正则表达式来写这个高亮的类~~
- import java.io.IOException;
- import java.io.StringReader;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.cjk.CJKAnalyzer;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.highlight.Highlighter;
- import org.apache.lucene.search.highlight.QueryScorer;
- public class Highlight {
- int beginindex;
- int i;
- int a;
- int summarylength;
- String summary;
- String lightquerystring;
- String queryString;
- QueryScorer scorer;
- Query query;
- int maxNumFragments = 2;
- String separator = "...";
- // ~
- // Constructors_-------------------------------------------------------------------
- public Highlight() {
- }
- // ~ Methods
- // --------------------------------------------------------------------------------
- /**
- * method used to highlight the keyword in the summary,a simple way to do
- * highlight job
- *
- * @throws IOException
- */
- public String Highlighter(String summary, String queryString)
- throws IOException {
- beginindex = summary.indexOf(queryString);
- summarylength = summary.length();
- lightquerystring = "<B>" + queryString + "</B>";
- if (queryString.contains(" ")) { // 有空格
- String[] subquerystring = queryString.split(" ");
- a = subquerystring.length;
- beginindex = summary.indexOf(queryString);
- for (i = 0; i < a; i++) {
- beginindex = summary.indexOf(subquerystring[i]);
- System.out.println(beginindex);
- lightquerystring = "<B>" + subquerystring[i] + "</B>";
- summary = summary.replace(subquerystring[i], lightquerystring);
- }
- StringReader sb = new StringReader(summary);
- // 按行读取已经高亮的summary,
- // 判断每行时候有关键词,有的话就添加到返回的summary
- BufferedReader br = new BufferedReader(sb);
- String str;
- for (i = 0; i < a; i++) {
- while ((str = br.readLine()) != null) {
- if (str.contains(subquerystring[i])) {
- summary = str + summary;
- }
- }
- }
- return summary;
- } else if (!queryString.contains(" ")) {// 没有空格
- if (summarylength <= 200 && summarylength >= 0) {
- summary = summary.substring(beginindex, summarylength);
- summary = summary.replace(queryString, lightquerystring);
- } else if (summarylength > 200) {
- if ((summarylength - beginindex) >= 200) {
- summary = summary.substring(beginindex, beginindex + 200);
- summary = summary.replace(queryString, lightquerystring);
- } else if ((summarylength - beginindex) < 200) {
- summary = summary.substring(beginindex, summarylength);
- summary = summary.replace(queryString, lightquerystring);
- }
- }
- return summary;
- } else {
- return lightquerystring;
- }
- }
- /**
- * method highlighter uses Highlighter.jar to highlight the keyword in the
- * summary
- *
- */
- public String Highlighter(String summary, String queryString, Query query,
- CJKAnalyzer analyzer, String contents) throws IOException {
- QueryScorer scorer = new QueryScorer(query);
- Highlighter highlighter = new Highlighter(scorer);
- TokenStream tokenStream = analyzer.tokenStream("contents",
- new StringReader(summary));
- return highlighter.getBestFragments(tokenStream, summary,
- maxNumFragments, separator);
- }
- }
复制代码
[ 本帖最后由 powerwind 于 2006-6-7 23:44 编辑 ] |
|