黄色网页视频 I 影音先锋日日狠狠久久 I 秋霞午夜毛片 I 秋霞一二三区 I 国产成人片无码视频 I 国产 精品 自在自线 I av免费观看网站 I 日本精品久久久久中文字幕5 I 91看视频 I 看全色黄大色黄女片18 I 精品不卡一区 I 亚洲最新精品 I 欧美 激情 在线 I 人妻少妇精品久久 I 国产99视频精品免费专区 I 欧美影院 I 欧美精品在欧美一区二区少妇 I av大片网站 I 国产精品黄色片 I 888久久 I 狠狠干最新 I 看看黄色一级片 I 黄色精品久久 I 三级av在线 I 69色综合 I 国产日韩欧美91 I 亚洲精品偷拍 I 激情小说亚洲图片 I 久久国产视频精品 I 国产综合精品一区二区三区 I 色婷婷国产 I 最新成人av在线 I 国产私拍精品 I 日韩成人影音 I 日日夜夜天天综合

coreseek3.2.14全文搜索引擎安裝使用

系統(tǒng) 2270 0

下載coreseek 3.2.14

wget http://www.coreseek.cn/uploads/csft/3.2/coreseek-3.2.14.tar.gz

wget http://pecl.php.net/get/sphinx-1.3.2.tgz #sphinx擴(kuò)展

coreseek安裝需要預(yù)裝的軟件:

yum install make gcc g++ gcc-c++ libtool autoconf automake imake mysql-devel libxml2-devel expat-devel

安裝xmlpipe2支持(如果上面已安裝可以不用再安裝)
[root@localhost tools]#yum? -y install expat-devel*
[root@localhost tools]#tar -xzvf coreseek-3.2.14.tar.gz

[root@localhost tools]#cd coreseek-3.2.14

安裝中文分詞插件mmseg-3.2.14
[root@localhost coreseek-3.2.14]#cd mmseg-3.2.14

[root@localhost mmseg-3.2.14]# ./configure --prefix=/usr/local/mmseg3

安裝時(shí)如果出現(xiàn)
config.status: error: cannot find input file: src/Makefile.in

可以通過安裝autoconf和automake解決

[root@localhost mmseg-3.2.14]#yum -y install autoconf automake

[root@localhost mmseg-3.2.14]#aclocal
如果出現(xiàn)下面錯(cuò)誤:
configure.in:26: warning: macro `AM_PROG_LIBTOOL' not found in library

[root@localhost mmseg-3.2.14]#yum -y install libtool
[root@localhost mmseg-3.2.14]#aclocal
[root@localhost mmseg-3.2.14]# libtoolize --force

libtoolize: putting auxiliary files in AC_CONFIG_AUX_DIR, `config'.
libtoolize: linking file `config/ltmain.sh'
libtoolize: Consider adding `AC_CONFIG_MACRO_DIR([m4])' to configure.in and
libtoolize: rerunning libtoolize, to keep the correct libtool macros in-tree.
libtoolize: Consider adding `-I m4' to ACLOCAL_AMFLAGS in Makefile.am.

[root@localhost mmseg-3.2.14]# automake --add-missing

[root@localhost mmseg-3.2.14]# autoconf

[root@localhost mmseg-3.2.14]# autoheader

[root@localhost mmseg-3.2.14]# make clean

[root@localhost mmseg-3.2.14]# ./configure --prefix=/usr/local/mmseg3

[root@localhost mmseg-3.2.14]# make && make install

[root@localhost mmseg-3.2.14]# ln -s /usr/local/mmseg3/bin/mmseg /bin/mmseg

接下來安裝Coreseek 3.2.14
[root@localhost mmseg-3.2.14]# cd ..

[root@localhost coreseek-3.2.14]# cd csft-3.2.14

[root@localhost csft-3.2.14]# ./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql

[root@localhost csft-3.2.14]# make && make install

安裝sphinx擴(kuò)展模塊
1)安裝libsphinxclient依賴包

[root@localhost coreseek-3.2.14]#cd api/libsphinxclient

[root@localhost libsphinxclient]#./configure

[root@localhost libsphinxclient]# make && make install

2)安裝PHP擴(kuò)展
[root@localhost coreseek-3.2.14]# tar -xzvf sphinx-1.3.2.tgz
[root@localhost coreseek-3.2.14]# cd sphinx-1.3.2

[root@localhost sphinx-1.3.2]# /usr/local/php/bin/phpize

[root@localhost sphinx-1.3.2]# ./configure --with-php-config=/usr/local/php/bin/php-config --with-sphinx

[root@localhost sphinx-1.3.2]# make && make install
修改PHP配置文件,添加sphinx擴(kuò)展
[root@localhost sphinx-1.3.2]# vi /usr/local/php/etc/php.ini
添加extension=sphinx.so
保存退出
重啟PHP
/etc/init.d/php-fpm restart

測(cè)試coreseek中文分詞
cd coreseek-3.2.14/testpack/
/usr/local/mmseg3/bin/mmseg -d /usr/local/mmseg3/etc var/test/test.xml

顯示
Word Splite took: 35 ms.

[root@kekejia testpack]#/usr/local/coreseek/bin/indexer -c etc/csft.conf --all
[root@kekejia testpack]#/usr/local/coreseek/bin/search -c etc/csft.conf? 網(wǎng)絡(luò)搜索

Coreseek Fulltext 3.2 [ Sphinx 0.9.9-release (r2117)]
Copyright (c) 2007-2011,
Beijing Choice Software Technologies Inc (http://www.coreseek.com)

?using config file 'etc/csft.conf'...
index 'xml': query '網(wǎng)絡(luò)搜索 ': returned 1 matches of 1 total in 0.001 sec

displaying matches:
1. document=1, weight=1, published=Thu Apr? 1 22:20:07 2010, author_id=1

words:
1. '網(wǎng)絡(luò)': 1 documents, 1 hits
2. '搜索': 2 documents, 5 hits

測(cè)試完畢,下面是一些錯(cuò)誤提示以及相應(yīng)的解決方法:
錯(cuò)誤一:
/usr/local/mmseg3/include/mmseg/mmthunk.h: In member function 'u2 css::ChunkQueue::getToken()':
/usr/local/mmseg3/include/mmseg/mmthunk.h:143: warning: comparison between signed and unsigned integer expressions
/usr/local/mmseg3/include/mmseg/mmthunk.h:161: warning: comparison between signed and unsigned integer expressions
sphinx.cpp: In member function 'virtual void CSphIndex_VLN::DebugDumpDocids(FILE*)':
sphinx.cpp:15267: warning: format '%d' expects type 'int', but argument 3 has type 'long unsigned int'
sphinxstd.h: In member function 'bool CSphHTMLStripper::SetRemovedElements(const char*, CSphString&)':
sphinxstd.h:579: warning: assuming signed overflow does not occur when assuming that (X + c) >= X is always true
sphinxstd.h: In member function 'bool CSphHTMLStripper::SetIndexedAttrs(const char*, CSphString&)':
sphinxstd.h:579: warning: assuming signed overflow does not occur when assuming that (X + c) >= X is always true
sphinxstd.h: In member function 'virtual bool CSphIndex_VLN::GetKeywords(CSphVector<CSphKeywordInfo>&, const char*, bool)':
sphinxstd.h:579: warning: assuming signed overflow does not occur when assuming that (X + c) >= X is always true
sphinxstd.h: In member function 'virtual int CSphIndex_VLN::Build(const CSphVector<CSphSource*>&, int, int)':
sphinxstd.h:579: warning: assuming signed overflow does not occur when assuming that (X + c) >= X is always true
sphinxstd.h:579: warning: assuming signed overflow does not occur when assuming that (X + c) >= X is always true
sphinxstd.h:579: warning: assuming signed overflow does not occur when assuming that (X + c) >= X is always true
make[2]: *** [sphinx.o] Error 1
make[2]: Leaving directory `/root/tools/coreseek-3.2.14/csft-3.2.14/src'
make[1]: *** [all] Error 2
make[1]: Leaving directory `/root/tools/coreseek-3.2.14/csft-3.2.14/src'
make: *** [all-recursive] Error 1

官網(wǎng)解決辦法:

In the meantime I've change the configuration file and set
#define USE_LIBICONV 0 in line 8179.
修改configure 文件把 #define USE_LIBICONV 0 最后的數(shù)值由1改為0
make && make install

錯(cuò)誤二:
make[2]: *** [indexer] Error 1
make[2]: Leaving directory `/www/tmp/csft-3.1/src'
make[1]: *** [all] Error 2
make[1]: Leaving directory `/www/tmp/csft-3.1/src'
make: *** [all-recursive] Error 1

解決辦法

vi ./src/sphinx.cpp

注釋以下代碼

#case TOKENIZER_ZHCN_GBK:
#pTokenizer = sphCreateGBKChineseTokenizer
#(tSettings.m_sDictPath.cstr(), tSettings.m_nBest); break;
然后重新編譯
make clean
make && make install

錯(cuò)誤三:索引不生成
/usr/local/coreseek/bin/indexer -c etc/csft.conf -all
改為
/usr/local/coreseek/bin/indexer -c etc/csft.conf --all

下面開始配置coreseek
cd /usr/local/coreseek/etc
cp sphinx-min.conf.dist csft.conf
vi csft.conf

#
# Minimal Sphinx configuration sample (clean, simple, functional)
#

source test1
{
??????? type??????????????????????????????????? = mysql

??????? sql_host??????????????????????????????? = localhost
??????? sql_user??????????????????????????????? = root
??????? sql_pass??????????????????????????????? = root
??????? sql_db????????????????????????????????? = test
??????? sql_port??????????????????????????????? = 3306? # optional, default is 3306
?? ??? ?sql_query_pre ??? ???????????????????????? = SET NAMES utf8 #sql_query_pre是在執(zhí)行查詢之前執(zhí)行的SQL語句。(注意:在coreseek只能識(shí)別utf8字符集編碼,所以我們要執(zhí)行轉(zhuǎn)換一下)
??????? sql_query?????????????????????????????? = \
??????????????? SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
??????????????? FROM documents?? ??? ??? ??? ??? ??? ?#sql_query是要查詢進(jìn)行索引的SQL語句

??????? sql_attr_uint????????????????? ??? ??? ?= group_id?? ?#sql_attr_uint:uint無符號(hào)整型屬性,以上group_id、date_added都可用此設(shè)置,使用SetFilter()過濾,或者使用SetFilterRange()過濾;sql_attr_float?? :浮點(diǎn)數(shù)屬性
??????? sql_attr_timestamp???????????? ??? ??? ?= date_added?? ?#時(shí)間戳屬性,經(jīng)常被用于做排序

??????? sql_query_info???????????????? ??? ??? ?= SELECT * FROM documents WHERE id=$id?? ?#命令行獲取信息查詢.我們進(jìn)行索引一般只會(huì)返回主鍵id,而不會(huì)返回表中的所有字段。但是在調(diào)試的時(shí)候,
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#我們一般需要返回表中的字段,那這個(gè)時(shí)候,就需要使用sql_query_info。同時(shí)這個(gè)字段只在控制臺(tái)有效,在api中是無效的。
}


index test1
{
?? ??? ?source????????????????????????????????? = test1?? ??? ?#索引數(shù)據(jù)源
?? ??? ?type?? ??? ??? ??? ??? ??? ??? ??? ??? ?= plain ?? ?#索引類型,包括有plain,distributed和rt。分別是普通索引/分布式索引/增量索引。默認(rèn)是plain。
??????? path??????????????????????????????????? = /usr/local/coreseek/var/data/test1?? ?#索引文件存放路徑
??????? docinfo???????????????????????????????? = extern?? ?#文檔信息的存儲(chǔ)模式,包括有none,extern,inline。默認(rèn)是extern。docinfo指的就是數(shù)據(jù)的所有屬性(field)構(gòu)成的一個(gè)集合。
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#首先文檔id是存儲(chǔ)在一個(gè)文件中的(spa),當(dāng)使用inline的時(shí)候,文檔的屬性和文件的id都是存放在spa中的,所以進(jìn)行查詢過濾的時(shí)候,
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#不需要進(jìn)行額外操作。當(dāng)使用extern的時(shí)候,文檔的屬性是存放在另外一個(gè)文件(spd)中的,但是當(dāng)啟動(dòng)searchd的時(shí)候,會(huì)把這個(gè)文件加載到內(nèi)存中。
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#extern就意味著每次做查詢過濾的時(shí)候,除了查找文檔id之外,還需要去內(nèi)存中根據(jù)屬性進(jìn)行過濾。
?? ??? ?charset_dictpath?? ??? ??? ??? ??? ??? ?= /usr/local/mmseg3/etc/?? ?#必須設(shè)置,表示詞典文件的目錄,該目錄下必須有uni.lib詞典文件存在;特別注意,更換或者修改詞典后,需要重新索引數(shù)據(jù)并重啟searchd才能生效。
??????? charset_type?????????????????? ??? ??? ?= utf-8?? ??? ?#字符集編碼類型,可以為sbcs,utf-8。對(duì)于Coreseek,還可以有zh_cn.utf-8,zh_ch.gbk,zh_ch.big5。
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#必須設(shè)置,表示啟用中文分詞功能;否則中文分詞功能無效,使用sphinx的其他處理模式。
?? ???? #charset_table????????? ??? ??? ??? ??? ?= 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F#表示可被一元字符切分模式認(rèn)可的有效字符集

??????? #ngram_chars??????????? ??? ??? ??? ??? ?= U+3000..U+2FA1F #表示要進(jìn)行一元字符切分模式的字符集
?? ??? ?

   charset_table?????????????????????????? = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,\
????????????????????????????????????????????????? A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,\
????????????????????????????????????????????????? U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101,\
????????????????????????????????????????????????? U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109,\
????????????????????????????????????????????????? U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F,\
????????????????????????????????????????????????? U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, \
????????????????????????????????????????????????? U+0116->U+0117,U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D,\
????????????????????????????????????????????????? U+011D,U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, \
????????????????????????????????????????????????? U+0134->U+0135,U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, \
????????????????????????????????????????????????? U+013C,U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, \
????????????????????????????????????????????????? U+0143->U+0144,U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, \
????????????????????????????????????????????????? U+014B,U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, \
????????????????????????????????????????????????? U+0152->U+0153,U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159,\
????????????????????????????????????????????????? U+0159,U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, \
????????????????????????????????????????????????? U+0160->U+0161,U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, \
????????????????????????????????????????????????? U+0167,U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, \
????????????????????????????????????????????????? U+016E->U+016F,U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175,\
????????????????????????????????????????????????? U+0175,U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, \
????????????????????????????????????????????????? U+017B->U+017C,U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, \
????????????????????????????????????????????????? U+0430..U+044F,U+05D0..U+05EA, U+0531..U+0556->U+0561..U+0586, U+0561..U+0587, \
????????????????????????????????????????????????? U+0621..U+063A, U+01B9,U+01BF, U+0640..U+064A, U+0660..U+0669, U+066E, U+066F, \
????????????????????????????????????????????????? U+0671..U+06D3, U+06F0..U+06FF,U+0904..U+0939, U+0958..U+095F, U+0960..U+0963, \
????????????????????????????????????????????????? U+0966..U+096F, U+097B..U+097F,U+0985..U+09B9, U+09CE, U+09DC..U+09E3, U+09E6..U+09EF, \
????????????????????????????????????????????????? U+0A05..U+0A39, U+0A59..U+0A5E,U+0A66..U+0A6F, U+0A85..U+0AB9, U+0AE0..U+0AE3, \
????????????????????????????????????????????????? U+0AE6..U+0AEF, U+0B05..U+0B39,U+0B5C..U+0B61, U+0B66..U+0B6F, U+0B71, U+0B85..U+0BB9, \
????????????????????????????????????????????????? U+0BE6..U+0BF2, U+0C05..U+0C39,U+0C66..U+0C6F, U+0C85..U+0CB9, U+0CDE..U+0CE3, \
????????????????????????????????????????????????? U+0CE6..U+0CEF, U+0D05..U+0D39, U+0D60,U+0D61, U+0D66..U+0D6F, U+0D85..U+0DC6, \
????????????????????????????????????????????????? U+1900..U+1938, U+1946..U+194F, U+A800..U+A805,U+A807..U+A822, U+0386->U+03B1, \
????????????????????????????????????????????????? U+03AC->U+03B1, U+0388->U+03B5, U+03AD->U+03B5,U+0389->U+03B7, U+03AE->U+03B7, \
????????????????????????????????????????????????? U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,U+03AF->U+03B9, U+03CA->U+03B9, \
????????????????????????????????????????????????? U+038C->U+03BF, U+03CC->U+03BF, U+038E->U+03C5,U+03AB->U+03C5, U+03B0->U+03C5, \
????????????????????????????????????????????????? U+03CB->U+03C5, U+03CD->U+03C5, U+038F->U+03C9,U+03CE->U+03C9, U+03C2->U+03C3, \
????????????????????????????????????????????????? U+0391..U+03A1->U+03B1..U+03C1,U+03A3..U+03A9->U+03C3..U+03C9, U+03B1..U+03C1, \
????????????????????????????????????????????????? U+03C3..U+03C9, U+0E01..U+0E2E,U+0E30..U+0E3A, U+0E40..U+0E45, U+0E47, U+0E50..U+0E59, \
????????????????????????????????????????????????? U+A000..U+A48F, U+4E00..U+9FBF,U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF, \
????????????????????????????????????????????????? U+2F800..U+2FA1F, U+2E80..U+2EFF,U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF, \
????????????????????????????????????????????????? U+3040..U+309F, U+30A0..U+30FF,U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, \
????????????????????????????????????????????????? U+3130..U+318F, U+A000..U+A48F,U+A490..U+A4CF

??????? ngram_chars???????????????????????????? = U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,\
????????????????????????????????????????????????? U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,\
????????????????????????????????????????????????? U+3040..U+309F, U+30A0..U+30FF, U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF,\
????????????????????????????????????????????????? U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF
??????? min_prefix_len????????????????????????? = 0
??????? min_infix_len?????????????????????????? = 1

??????? min_word_len??????????????????????????? = 1?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ????

?? ???? ngram_len?? ??? ??? ??? ??? ??? ??? ??? ?= 1 ?? ?#必須設(shè)置,0表示取消原有的一元字符切分模式,不使其對(duì)中文分詞產(chǎn)生干擾;1表示使用一元字符切分模式,從而得以對(duì)單個(gè)中文字符進(jìn)行索引;
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#啟用中文分詞功能后,需要source數(shù)據(jù)源之中,讀取的數(shù)據(jù)編碼字符集為UTF-8,否則無法正確處理;
?? ??? ?#morphology???? ??? ??? ??? ??? ??? ??? ?= none?? ?#詞形處理器。詞形處理是什么意思呢?比如在英語中,dogs是dog的復(fù)數(shù),所以dog是dogs的詞干,這兩個(gè)實(shí)際上是同一個(gè)詞。
?? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ??? ?#所以英語的詞形處理器會(huì)講dogs當(dāng)做dog來進(jìn)行處理。
?? ??? ?#min_stemming_len ??? ??? ??? ??? ??? ??? ?= 1?? ??? ?#根據(jù)詞的長度來決定是否要使用詞形處理器。
?? ??? ?#index_exact_words ?? ??? ??? ??? ??? ??? ?= 1?? ??? ?#詞形處理后是否還要檢索原詞
?? ??? ?#stopwords??? ??? ??? ??? ??? ??? ??? ??? ?= /usr/local/coreseek/var/data/stopwords.txt?? ?#停止詞,停止詞是不被索引的詞。
?? ??? ?#wordforms?? ??? ??? ??? ??? ??? ??? ??? ?= /usr/local/coreseek/var/data/wordforms.txt?? ?#自定義詞形字典
?? ??? ?#exceptions?? ??? ??? ??? ??? ??? ??? ??? ?= /usr/local/coreseek/var/data/exceptions.txt?? ?#詞匯特殊處理。有的一些特殊詞我們希望把它當(dāng)成另外一個(gè)詞來處理。比如,c++ => cplusplus來處理。
?? ??? ?#min_word_len?? ??? ??? ??? ??? ??? ??? ?= 1?? ??? ?#最小索引詞長度,小于這個(gè)長度的詞不會(huì)被索引。
?? ??? ?#enable_star?? ??? ??? ??? ??? ??? ??? ?= 0?? ??? ?#是否啟用通配符,默認(rèn)為0,不啟用。min_prefix_len,min_infix_len,prefix_fields,infix_fields都是在enable_star開啟的時(shí)候才有效果。
?? ??? ?#min_prefix_len?????? ??? ??? ??? ??? ??? ?= 0?? ??? ?#最小前綴索引長度。
?? ??? ?#min_infix_len?? ??? ??? ??? ??? ??? ??? ?= 0?? ??? ?#最小索引中綴長度。
?? ??? ?#prefix_fields??? ??? ??? ??? ??? ??? ??? ?= filename?? ??? ?#前綴索引字段列表。
?? ??? ?#infix_fields???? ??? ??? ??? ??? ??? ??? ?= url, domain?? ?#中綴索引字段列表。
?? ??? ?#html_strip???? ??? ??? ??? ??? ??? ??? ?= 0?? ??? ?#html標(biāo)記清理,是否從輸出全文數(shù)據(jù)中去除HTML標(biāo)記。
?? ??? ?#html_index_attrs ??? ??? ??? ??? ??? ??? ?= img=alt,title; a=title;?? ?#HTML標(biāo)記屬性索引設(shè)置。
?? ??? ?#html_remove_elements ??? ??? ??? ??? ??? ?= style, script?? ??? ?#需要清理的html元素
?? ??? ?#preopen????????? ??? ??? ??? ??? ??? ??? ?= 1?? ??? ?#searchd是預(yù)先打開全部索引還是每次查詢?cè)俅蜷_索引。
?? ??? ?
?? ??? ?
}


indexer
{
??????? mem_limit?????????????????????????????? = 32M?? ?#建立索引的時(shí)候,索引內(nèi)存限制
?? ??? ?#max_iops???? ??? ??? ??? ??? ??? ??? ??? ?= 40?? ?#每秒最大I/O操作次數(shù),用于限制I/O操作
?? ??? ?#max_iosize?? ??? ??? ??? ??? ??? ??? ??? ?= 1048576?? ?#最大允許的I/O操作大小,以字節(jié)為單位,用于I/O節(jié)流
?? ??? ?#max_xmlpipe2_field?? ??? ??? ??? ??? ??? ?= 4M?? ??? ?#對(duì)于XMLLpipe2數(shù)據(jù)源允許的最大的字段大小,以字節(jié)為單位
?? ??? ?#write_buffer???? ??? ??? ??? ??? ??? ??? ?= 1M?? ??? ?#寫緩沖區(qū)的大小,單位是字節(jié)
?? ??? ?#max_file_field_buffer ?? ??? ??? ??? ??? ?= 32M?? ??? ?#文件字段可用的最大緩沖區(qū)大小,單位是字節(jié)
?? ??? ?
}


searchd
{
??????? port??????????????????????????????????? = 9312
??????? log???????????????????????????????????? = /usr/local/coreseek/var/log/searchd.log?? ?#監(jiān)聽日志
??????? query_log?????????????????????????????? = /usr/local/coreseek/var/log/query.log?? ??? ?#查詢?nèi)罩?
??????? read_timeout?????????????????? ??? ??? ?= 5?? ??? ?#客戶端讀取超時(shí)時(shí)間
??????? max_children?????????????????? ??? ??? ?= 30?? ?#并行執(zhí)行搜索的數(shù)目
??????? pid_file??????????????????????????????? = /usr/local/coreseek/var/log/searchd.pid?? ?#進(jìn)程id文件
??????? max_matches???????????????????????????? = 1000?? ?#守護(hù)進(jìn)程在內(nèi)存中為每個(gè)索引所保持并返回給客戶端的匹配數(shù)目的最大值
??????? seamless_rotate?????????????? ??? ??? ??? ?= 1?? ??? ?#無縫輪轉(zhuǎn)。防止 searchd 輪換在需要預(yù)取大量數(shù)據(jù)的索引時(shí)停止響應(yīng)
??????? preopen_indexes??????????????? ??? ??? ?= 0?? ??? ?#索引預(yù)開啟,是否強(qiáng)制重新打開所有索引文件
??????? unlink_old????????????????????????????? = 1?? ??? ?#索引輪換成功之后,是否刪除以.old為擴(kuò)展名的索引拷貝
?? ??? ?#max_packet_size??? ??? ??? ??? ??? ??? ?= 8M?? ?#網(wǎng)絡(luò)通訊時(shí)允許的最大的包的大小
?? ??? ?#crash_log_path?? ??? ??? ??? ??? ??? ??? ?= /usr/local/coreseek/var/log/crash?? ??? ?#崩潰日志文件路徑
?? ??? ?#max_filters??? ??? ??? ??? ??? ??? ??? ?= 256?? ?#每次查詢?cè)试S設(shè)置的過濾器的最大個(gè)數(shù)
?? ??? ?#max_filter_values? ??? ??? ??? ??? ??? ?= 4096?? ?#單個(gè)過濾器允許的值的最大個(gè)數(shù)
?? ??? ?#listen_backlog?????? ??? ??? ??? ??? ??? ?= 5?? ??? ?#TCP監(jiān)聽待處理隊(duì)列長度
?? ??? ?#read_buffer????? ??? ??? ??? ??? ??? ??? ?= 256K?? ?#每個(gè)關(guān)鍵字的讀緩沖區(qū)的大小
?? ??? ?#read_unhinted??? ??? ??? ??? ??? ??? ??? ?= 32K?? ?#無匹配時(shí)讀操作的大小
?? ??? ?#max_batch_queries? ??? ??? ??? ??? ??? ?= 32?? ?#每次批量查詢的查詢數(shù)限制
?? ??? ?#dist_threads???? ??? ??? ??? ??? ??? ??? ?= 4?? ??? ?#并發(fā)查詢線程數(shù)
?? ??? ?#binlog_path?? ??? ??? ??? ??? ??? ??? ?= /usr/local/coreseek/var/data/?? ??? ?#二進(jìn)制日志路徑
?? ??? ?#binlog_max_log_size? ??? ??? ??? ??? ??? ?= 256M?? ?#二進(jìn)制日志大小限制
?? ??? ?#thread_stack???????? ??? ??? ??? ??? ??? ?= 128K?? ?#線程堆棧
?? ??? ?#expansion_limit????? ??? ??? ??? ??? ??? ?= 1000?? ?#關(guān)鍵字展開限制
?? ??? ?#rt_flush_period????? ??? ??? ??? ??? ??? ?= 900?? ?#RT索引刷新周期
?? ??? ?#query_log_format???? ??? ??? ??? ??? ??? ?= sphinxql?? ??? ?#查詢?nèi)罩靖袷剑蛇x項(xiàng),可用值為plain、sphinxql,默認(rèn)為plain。
?? ??? ?#plugin_dir?????????? ??? ??? ??? ??? ??? ?= /usr/local/sphinx/lib?? ??? ?#插件目錄
?? ??? ?#collation_server???? ??? ??? ??? ??? ??? ?= utf8_general_ci?? ??? ??? ?#服務(wù)端默認(rèn)字符集
?? ??? ?#watchdog???????????? ??? ??? ??? ??? ??? ?= 1?? ??? ?#線程服務(wù)看守
?? ??? ?#compat_sphinxql_magics?? ??? ??? ??? ??? ?= 1?? ??? ?#兼容模式
}

創(chuàng)建索引
[root@kekejia bin]# /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf test1
Coreseek Fulltext 3.2 [ Sphinx 0.9.9-release (r2117)]
Copyright (c) 2007-2011,
Beijing Choice Software Technologies Inc (http://www.coreseek.com)

?using config file '/usr/local/coreseek/etc/csft.conf'...
indexing index 'test1'...
collected 1431 docs, 0.1 MB
sorted 0.0 Mhits, 100.0% done
total 1431 docs, 57482 bytes
total 0.082 sec, 697427 bytes/sec, 17362.29 docs/sec
total 1 reads, 0.000 sec, 54.0 kb/call avg, 0.0 msec/call avg
total 5 writes, 0.000 sec, 30.7 kb/call avg, 0.0 msec/call avg

運(yùn)行coreseek服務(wù)進(jìn)程searchd
[root@kekejia bin]# /usr/local/coreseek/bin/searchd -c /usr/local/coreseek/etc/csft.conf

Coreseek Fulltext 3.2 [ Sphinx 0.9.9-release (r2117)]
Copyright (c) 2007-2011,
Beijing Choice Software Technologies Inc (http://www.coreseek.com)

using config file '/usr/local/coreseek/etc/csft.conf'...
listening on all interfaces, port=9312

測(cè)試:默認(rèn)顯示20條結(jié)果
[root@kekejia bin]# /usr/local/coreseek/bin/search -i test1 技巧
Coreseek Fulltext 3.2 [ Sphinx 0.9.9-release (r2117)]
Copyright (c) 2007-2011,
Beijing Choice Software Technologies Inc (http://www.coreseek.com)

?using config file '/usr/local/coreseek/etc/csft.conf'...
index 'course': query '技巧 ': returned 30 matches of 30 total in 0.003 sec

displaying matches:
1. document=37, weight=1
2. document=201, weight=1
3. document=202, weight=1
4. document=250, weight=1
5. document=285, weight=1
6. document=286, weight=1
7. document=356, weight=1
8. document=360, weight=1
9. document=393, weight=1
10. document=439, weight=1
11. document=493, weight=1
12. document=494, weight=1
13. document=527, weight=1
14. document=711, weight=1
15. document=732, weight=1
16. document=737, weight=1
17. document=790, weight=1
18. document=791, weight=1
19. document=869, weight=1
20. document=915, weight=1

words:
1. '技巧': 30 documents, 30 hits

接下來把這兩個(gè)進(jìn)程放在crontab里定時(shí)運(yùn)行
*/5 * * * * /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.conf --rotate --all >/dev/null 2>&1

至此服務(wù)器部分安裝完成

接下來在thinkphp里進(jìn)行搜索
class SearchAction extends Action
{
?? ?protected $sp;
?? ?protected function _initialize(){
?? ??? ?$this->sp = new SphinxClient();
??? ??? ?$this->sp->SetServer("localhost",9312);
?? ?}
?? ?/**
?? ? *
?? ? * 搜索入口
?? ? * @param string $type 索引名稱(test1)
?? ? * @param string $word 搜索關(guān)鍵字
?? ? * @param int $page 頁碼
?? ? * @param int $length 每頁顯示條數(shù)
?? ? * @return array
?? ? */
???? protected function _search($type,$word,$page=1,$length=10){
??? ??? ?if(empty($type)||empty($word)){
??? ??? ??? ?return;
??? ??? ?}

   //如果需要排序可以用下面的方法
?? ??? ?//$this->sp->SetSortMode(SPH_SORT_EXTENDED,'time desc');//按指定條件排序
?? ??? ?//$this->sp->SetSortMode(SPH_SORT_RELEVANCE);//按相似度排序
??? ??? ?$this->sp->SetLimits(($page-1)*$length, $length);
??? ??? ?if(!$rs = $this->sp->query($word,$type)){
??? ??? ??? ?return ;
??? ??? ?}
??? ??? ?$this->sp->Close();
??? ??? ?return $rs;
???? }
?? ? /**
?? ? *
?? ? * 合并處理搜索結(jié)果中的ID
?? ? * @param array $value
?? ? * @return string 將ID用逗號(hào)連接起來的字符串
?? ? * [matches] => Array
??????? (
??????????? [1596] => Array
??????????????? (
??????????????????? [weight] => 1
??????????????????? [attrs] => Array
??????????????????????? (
??????????????????????? )

??????????????? )

??????????? [1989] => Array
??????????????? (
??????????????????? [weight] => 1
??????????????????? [attrs] => Array
??????????????????????? (
??????????????????????? )

??????????????? )

??????? )
?? ? */
?? ?protected function _searchValueCombine($value){
?? ??? ?if(!$value||!is_array($value)){
?? ??? ??? ?return ;
?? ??? ?}


?? ??? ?$str = '';
?? ??? ?foreach ($value as $key => $v){
?? ??? ??? ?$str .= ','.$key;
?? ??? ?}
?? ??? ?$str = trim($str,',');
?? ??? ?return $str;
?? ?}
?? ?function search(){
?? ??? ?$page ?? ??? ?= (int)I('get.page',1);//頁碼
?? ??? ?$keyword ?? ?= I('get.key_word');//關(guān)鍵字
??????? if(empty($keyword)){
?????? ??? ?$this->error();
??????? }
?? ??? ?$length = 16;
?? ??? ?$result = $this->_search('test1', $keyword,$page,$length);
??????? $ids = $this->_searchValueCombine($result['matches']);
??????? if($result['total']>$length){//分頁
?????? ??? ?$pg = new Page($result['total'],$length,'',"/search/$keyword?page=__PAGE__");
?????? ??? ?$purl = $pg->show();
?????? ??? ?$this->assign('page',$purl);
??????? }?? ??? ??? ?
??????? $s = D('test');
??????? $search_result = $s->field('id,name')->where("id IN ($ids)")->select();

    //高亮顯示
?? ??? ?$title = array();
?? ??? ?foreach ($search_result as $k => $v){
?? ??? ??? ?$title[] = $v['title'];
?? ??? ?}
?? ??? ?$opt = array();
?? ??? ?$opt['before_match'] = '<b><font style="color:red;">';//匹配字符之前添加的
?? ??? ?$opt['after_match'] = '</font></b>';//匹配字符后面添加的
?? ??? ?$title = $this->sp->BuildExcerpts($title, 'test1', $keyword,$opt);//spinx內(nèi)置的高亮方法
?? ??? ?foreach ($search_result as $k => $v){
?? ??? ??? ?$search_result[$k]['title'] = $title[$k];
?? ??? ?}
?? ??? ?$this->assign('result',$search_result);
?? ??? ?$this->display();
?? ?}
}

coreseek3.2.14全文搜索引擎安裝使用


更多文章、技術(shù)交流、商務(wù)合作、聯(lián)系博主

微信掃碼或搜索:z360901061

微信掃一掃加我為好友

QQ號(hào)聯(lián)系: 360901061

您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對(duì)您有幫助,請(qǐng)用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點(diǎn)擊下面給點(diǎn)支持吧,站長非常感激您!手機(jī)微信長按不能支付解決辦法:請(qǐng)將微信支付二維碼保存到相冊(cè),切換到微信,然后點(diǎn)擊微信右上角掃一掃功能,選擇支付二維碼完成支付。

【本文對(duì)您有幫助就好】

您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對(duì)您有幫助,請(qǐng)用微信掃描上面二維碼支持博主2元、5元、10元、自定義金額等您想捐的金額吧,站長會(huì)非常 感謝您的哦!!!

發(fā)表我的評(píng)論
最新評(píng)論 總共0條評(píng)論