-- drop table testproduct;CREATE TABLE testproduct( Id VARCHAr(16), prod_nameTEXT, ProductIdType VARCHAr(8), PRIMARY KEY (Id), FULLTEXT (prod_name)) ENGINE=MyISAM;insert into testproduct (id,prod_name,productidtype) values ('B00005N5PF','one pen and a good price for a pen','ASIN');insert into testproduct (id,prod_name,productidtype) values ('B570J5XS3C',null,'ASIN');insert into testproduct (id,prod_name,productidtype) values ('C00ZZ5N5PF','let us get rid of some noise','ASIN');insert into testproduct (id,prod_name,productidtype) values ('D00LL5N5PA','four score and seven years ago our fore...','ASIN');insert into testproduct (id,prod_name,productidtype) values ('EEEZZ5N5PF','he has a harpoon','ASIN');insert into testproduct (id,prod_name,productidtype) values ('C01ZZ5N5PF','and then we','ASIN');insert into testproduct (id,prod_name,productidtype) values ('B00ZZ5N5PF','he has a pen in his pocket not a banana','ASIN');insert into testproduct (id,prod_name,productidtype) values ('C02ZZ5N5PF','went to the store','ASIN');insert into testproduct (id,prod_name,productidtype) values ('C03ZZ5N5PF','and decided that we should buy some','ASIN');insert into testproduct (id,prod_name,productidtype) values ('C04ZZ5N5PF','fruit cups or fruit or berries or pebbles','ASIN');insert into testproduct (id,prod_name,productidtype) values ('C037Z5N5PF','then he and her she and it','ASIN');insert into testproduct (id,prod_name,productidtype) values ('C04K95N5PF','threw some daggers and a harpoon','ASIN');insert into testproduct (id,prod_name,productidtype) values ('D88895N5PF','more noise and some of this','ASIN');insert into testproduct (id,prod_name,productidtype) values ('D34595N5PF','this article about harpoons really drills into the throwing of harpoon or harpoons to those that deserve a harpoon','ASIN');insert into testproduct (id,prod_name,productidtype) values ('D12395N5PF','and there we go','ASIN');全文搜索需要多种多样的方式来消除重复的“噪音”。用最少的数据进行测试将产生较差的结果。将您的整个收藏集投入其中,以获取有意义的内容。如以下某些链接所示,甚至可以尝试搜索最小字数的设置。
停用词
有各种语言的停用词
MySql 列表,这些词表示在搜索过程中忽略的无关紧要的词。该列表已编译到服务器中,但可以覆盖,如本手册页和文本所示:
要覆盖默认停用词列表,请设置ft_stopword_file系统变量。(请参见第5.1.4节“服务器系统变量”。)变量值应为包含停用词列表的文件的路径名,或为禁用停用词过滤的空字符串。除非给出绝对路径名以指定其他目录,否则服务器将在数据目录中查找文件。更改此变量的值或停用词文件的内容后,重新启动服务器并重建FULLTEXT索引。
一些样本查询
-- select * from testproductSELECt * FROM testproduct WHERe MATCH(prod_name) AGAINST('score' IN BOOLEAN MODE);SELECt * FROM testproduct WHERe MATCH(prod_name) AGAINST('harpoon' IN BOOLEAN MODE);SELECt * FROM testproduct WHERe MATCH(prod_name) AGAINST('banana' IN BOOLEAN MODE);SELECt * FROM testproduct WHERe MATCH(prod_name) AGAINST('years' IN BOOLEAN MODE);获得多个单词匹配:
SELECt id,prod_name, match( prod_name )AGAINST ( '+harpoon +article' IN BOOLEAN MODE ) AS relevanceFROM testproduct ORDER BY relevance DESC
在
relevance列中给出实际权重:
SELECt id,prod_name, match( prod_name )AGAINST ( '+harpoon +article' IN NATURAL LANGUAGE MODE) AS relevanceFROM testproduct ORDER BY relevance DESC+------------+--------------------------------------------------------------------------------------------------------------------+--------------------+| id | prod_name | relevance |+------------+--------------------------------------------------------------------------------------------------------------------+--------------------+| D34595N5PF | this article about harpoons really drills into the throwing of harpoon or harpoons to those that deserve a harpoon | 3.6207125186920166 || EEEZZ5N5PF | he has a harpoon| 1.2845110893249512 || C04K95N5PF | threw some daggers and a harpoon | 1.2559525966644287 ||------------+--------------------------------------------------------------------------------------------------------------------+--------------------+
从这里取消了多个单词部分。谢谢斯宾塞



