布隆过滤器:一种数据结构,是由一串很长的二进制向量组成,可以将其看成一个二进制数组。既然是二进制,那么里面存放的不是0,就是1,初始默认值都是0。
2.google guava 实现使用场景:在大数据量集合中,如何准确快速的判断某个数据是否存在,并且不占用大量内存。
比如,在一亿个手机号码中,判断某个手机号是否存在。
com.google.guava guava 28.0-jre
@GetMapping("/test")
public void test(@RequestParam("total") int total,
@RequestParam(value = "fpp", required = false) String fpp) {
BloomFilter bloomFilter;
if (fpp == null) {
// 默认误差是:0.03
bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), total);
} else {
bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), total, Float.parseFloat(fpp));
}
Set set = new HashSet<>(1000);
List list = new ArrayList<>(1000);
// 布隆过滤器添加total个数据,比如100w
for (int i = 0; i < total; i++) {
String uuid = UUID.randomUUID().toString();
if (i < 1000) {
set.add(uuid);
list.add(uuid);
}
bloomFilter.put(uuid);
}
// 模拟10w个数据,其中1000个是布隆过滤器已记录的
int number = total / 10;
int right = 0; // 布隆过滤器正确次数
int wrong = 0; // 布隆过滤器误判次数
for (int i = 0; i < number; i++) {
int index = number / 1000;
String str = i % (index) == 0 ? list.get(i / index) : UUID.randomUUID().toString();
if (bloomFilter.mightContain(str)) {
if (set.contains(str)) {
right++;
} else {
wrong++;
}
}
}
System.out.println("布隆过滤器容器大小:" + total);
System.out.println("模拟数据量:" + number);
System.out.println("正确次数:" + right);
System.out.println("误判次数:" + wrong);
NumberFormat numberFormat = NumberFormat.getInstance();
numberFormat.setMaximumFractionDigits(4);
System.out.println("误判率:" + numberFormat.format((float) wrong / (float) number));
}
3.自定义实现http://localhost:8080/test?total=100000
http://localhost:8080/test?total=1000000
http://localhost:8080/test?total=100000&fpp=0.01
http://localhost:8080/test?total=1000000&fpp=0.01
package com.yzm.redis11.bloom;
public abstract class MyBloomFilter {
//用来生成不同的hash值,可以随便给,但别给奇数
protected final int[] ints = {8, 22, 32, 66, 88, 128};
//布隆过滤器中数据个数
protected Integer count = 0;
//数据量
public abstract Integer count();
//清空数据
public abstract void clear();
//添加数据
public abstract void push(Object key);
//判断key是否存在,true不一定说明key存在,但是false一定说明不存在
public abstract boolean contains(Object key);
//hash算法
public abstract int hash(Object key, int i);
}
package com.yzm.redis11.bloom;
import java.util.BitSet;
public class DefaultBloomFilter extends MyBloomFilter{
//bit集合,用来存放结果
private final int DEFAULT_SIZE = Integer.MAX_VALUE;
private final BitSet bitSet = new BitSet(DEFAULT_SIZE);
public DefaultBloomFilter() {
}
public Integer count() {
return count;
}
public void clear() {
bitSet.clear();
count = 0;
}
public void push(Object key) {
for (int i : ints) {
bitSet.set(hash(key, i));
}
count++;
}
public boolean contains(Object key) {
for (int i : ints) {
boolean exist = bitSet.get(hash(key, i));
if (!exist) return false;
}
return true;
}
public int hash(Object key, int i) {
int h;
int index = key == null ? 0 : (DEFAULT_SIZE - 1 - i) & ((h = key.hashCode()) ^ (h >>> 16));
//bitSet下标不能小于0
return index > 0 ? index : -index;
}
}
@GetMapping("/test2")
public void test2(@RequestParam("total") int total) {
MyBloomFilter bf = new DefaultBloomFilter();
bf.clear();
Set set = new HashSet<>(1000);
List list = new ArrayList<>(1000);
//向布隆过滤器中填充数据
for (int i = 0; i < total; i++) {
String uuid = UUID.randomUUID().toString();
if (i < 1000) {
set.add(uuid);
list.add(uuid);
}
bf.push(uuid);
}
int number = total / 10;
int right = 0; // 布隆过滤器正确次数
int wrong = 0; // 布隆过滤器误判次数
for (int i = 0; i < number; i++) {
int index = number / 1000;
String str = i % (index) == 0 ? list.get(i / index) : UUID.randomUUID().toString();
if (bf.contains(str)) {
if (set.contains(str)) {
right++;
} else {
wrong++;
}
}
}
System.out.println("布隆过滤器容器大小:" + total);
System.out.println("模拟数据量:" + number);
System.out.println("正确次数:" + right);
System.out.println("误判次数:" + wrong);
NumberFormat numberFormat = NumberFormat.getInstance();
numberFormat.setMaximumFractionDigits(4);
System.out.println("误判率:" + numberFormat.format((float) wrong / (float) number));
}
http://localhost:8080/test2?total=100000 (3次)
http://localhost:8080/test2?total=1000000 (3次)
package com.yzm.redis11.bloom;
import org.springframework.data.redis.core.StringRedisTemplate;
public class RedisBloomFilter extends MyBloomFilter {
private final String bloom = "y_bloom";
private final StringRedisTemplate stringRedisTemplate;
public RedisBloomFilter(StringRedisTemplate stringRedisTemplate) {
//向布隆过滤器中填充数据
this.stringRedisTemplate = stringRedisTemplate;
}
public Integer count() {
return count;
}
public void clear() {
stringRedisTemplate.delete(bloom);
count = 0;
}
public void push(Object key) {
for (int i : ints) {
stringRedisTemplate.opsForValue().setBit(bloom, hash(key, i), true);
}
count++;
}
public boolean contains(Object key) {
for (int i : ints) {
Boolean exist = stringRedisTemplate.opsForValue().getBit(bloom, hash(key, i));
if (exist != null && !exist) return false;
}
return true;
}
public int hash(Object key, int i) {
int h;
int index = key == null ? 0 : (Integer.MAX_VALUE - 1 - i) & ((h = key.hashCode()) ^ (h >>> 16));
// offset偏移量是正整数
return index > 0 ? index : -index;
}
}
MyBloomFilter bf;
Set set;
List list;
@GetMapping("/test3")
public void test3(@RequestParam("total") int total, boolean first) {
// 向redis添加数据比较久,就第一次添加,一般都是提前添加好并日常积累
if (first) {
bf = new RedisBloomFilter(stringRedisTemplate);
set = new HashSet<>(1000);
list = new ArrayList<>(1000);
bf.clear();
//向布隆过滤器中填充数据
for (int i = 0; i < total; i++) {
String uuid = UUID.randomUUID().toString();
if (i < 1000) {
set.add(uuid);
list.add(uuid);
}
bf.push(uuid);
}
}
int number = total / 10;
int right = 0; // 布隆过滤器正确次数
int wrong = 0; // 布隆过滤器误判次数
for (int i = 0; i < number; i++) {
int index = number / 1000;
String str = i % (index) == 0 ? list.get(i / index) : UUID.randomUUID().toString();
if (bf.contains(str)) {
if (set.contains(str)) {
right++;
} else {
wrong++;
}
}
}
System.out.println("布隆过滤器容器大小:" + total);
System.out.println("模拟数据量:" + number);
System.out.println("正确次数:" + right);
System.out.println("误判次数:" + wrong);
NumberFormat numberFormat = NumberFormat.getInstance();
numberFormat.setMaximumFractionDigits(4);
System.out.println("误判率:" + numberFormat.format((float) wrong / (float) number));
}
4. redisbloom 实现http://localhost:8080/test3?total=100000&first=true
http://localhost:8080/test3?total=100000&first=false (2次)
http://localhost:8080/test3?total=1000000&first=false (3次)
首先需要在Redis服务器上安装 redisbloom
下载地址:https://gitcode.net/mirrors/RedisLabsModules/redisbloom/-/releases
wget https://github.com/RedisLabsModules/rebloom/archive/v2.2.9.tar.gz
tar -zxvf v2.2.9.tar.gz
cd RedisBloom-2.2.9/
make
看到:redisbloom.so
redis.conf 配置redisbloom.so路径
vim ./etc/redis.conf
loadmodule /usr/local/redis/RedisBloom-2.2.9/redisbloom.so
启动redis
./bin/redis-server ./etc/redis.conf
package com.yzm.redis11.bloom;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.data.redis.core.script.DefaultRedisscript;
import org.springframework.data.redis.core.script.Redisscript;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.List;
@Component
public class RedisBloom {
private final StringRedisTemplate redisTemplate;
public RedisBloom(StringRedisTemplate redisTemplate) {
this.redisTemplate = redisTemplate;
}
private static Redisscript reserve = new DefaultRedisscript<>("return redis.call('bf.reserve', KEYS[1], ARGV[1], ARGV[2])", Boolean.class);
private static Redisscript add = new DefaultRedisscript<>("return redis.call('bf.add', KEYS[1], ARGV[1])", Boolean.class);
private static Redisscript exists = new DefaultRedisscript<>("return redis.call('bf.exists', KEYS[1], ARGV[1])", Boolean.class);
private static String mAdd = "return redis.call('bf.madd', KEYS[1], %s)";
private static String mExists = "return redis.call('bf.mexists', KEYS[1], %s)";
public Boolean reserve(String key, String errorRate, String initialSize) {
return redisTemplate.execute(reserve, Arrays.asList(key), errorRate, initialSize);
}
public Boolean add(String key, String value) {
return redisTemplate.execute(add, Arrays.asList(key), value);
}
public Boolean exists(String key, String value) {
return redisTemplate.execute(exists, Arrays.asList(key), value);
}
public List mAdd(String key, String... values) {
return (List) redisTemplate.execute(this.generatescript(mAdd, values), Arrays.asList(key), values);
}
public List mExists(String key, String... values) {
return (List) redisTemplate.execute(this.generatescript(mExists, values), Arrays.asList(key), values);
}
private Redisscript generatescript(String script, String[] values) {
StringBuilder sb = new StringBuilder();
for (int i = 1; i <= values.length; i++) {
if (i != 1) {
sb.append(",");
}
sb.append("ARGV[").append(i).append("]");
}
return new DefaultRedisscript<>(String.format(script, sb), List.class);
}
}
@GetMapping("/test4")
public void test4() {
String key = "y_test";
// 配置错误率和存储空间
System.err.println(redisBloom.reserve(key, "0.01", "10000"));
// 添加元素
System.err.println(redisBloom.add(key, "周星驰"));
// 判断元素是否存在
System.err.println(redisBloom.exists(key, "周星星"));
// 批量添加元素
System.err.println(redisBloom.mAdd(key, "张学友", "刘德华", "郭富城", "黎明"));
// 批量判断元素是否存在
System.err.println(redisBloom.mExists(key, "张学友", "刘德华", "郭德纲", "黎明"));
}
http://localhost:8080/test4
Setset5; List list5; @GetMapping("/test5") public void test5(@RequestParam("total") int total, String rate, boolean first) { String key = "y_test5"; if (first) { redisBloom.reserve(key, rate, String.valueOf(total)); set5 = new HashSet<>(1000); list5 = new ArrayList<>(1000); //向布隆过滤器中填充数据 for (int i = 0; i < total; i++) { String uuid = UUID.randomUUID().toString(); if (i < 1000) { set5.add(uuid); list5.add(uuid); } redisBloom.add(key, uuid); } } int number = total / 10; int right = 0; // 布隆过滤器正确次数 int wrong = 0; // 布隆过滤器误判次数 for (int i = 0; i < number; i++) { int index = number / 1000; String str = i % (index) == 0 ? list5.get(i / index) : UUID.randomUUID().toString(); if (redisBloom.exists(key, str)) { if (set5.contains(str)) { right++; } else { wrong++; } } } System.out.println("布隆过滤器容器大小:" + total); System.out.println("模拟数据量:" + number); System.out.println("正确次数:" + right); System.out.println("误判次数:" + wrong); NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMaximumFractionDigits(4); System.out.println("误判率:" + numberFormat.format((float) wrong / (float) number)); }
5.redisson 实现http://localhost:8080/test5?total=100000&rate=0.05&first=true
http://localhost:8080/test5?total=100000&rate=0.05&first=false (2次)
http://localhost:8080/test5?total=1000000&rate=0.05&first=false (3次)
org.redisson redisson-spring-boot-starter 3.15.3
package com.yzm.redis11.config;
import org.redisson.Redisson;
import org.redisson.api.RedissonClient;
import org.redisson.config.Config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class RedissonConfig {
@Bean(name = "singleServer", destroyMethod = "shutdown")
public RedissonClient useSingleServer() {
Config config = new Config();
config.useSingleServer()
.setAddress("redis://192.168.8.128:6379")
.setPassword("1234")
.setDatabase(0);
return Redisson.create(config);
}
}
Setset6; List list6; @GetMapping("/test6") public void test6(@RequestParam("total") int total, String rate, boolean first) { RBloomFilter bloomFilter = redissonClient.getBloomFilter("y_test6"); if (first) { bloomFilter.tryInit(total, Double.parseDouble(rate)); set6 = new HashSet<>(1000); list6 = new ArrayList<>(1000); //向布隆过滤器中填充数据 for (int i = 0; i < total; i++) { String uuid = UUID.randomUUID().toString(); if (i < 1000) { set6.add(uuid); list6.add(uuid); } bloomFilter.add(uuid); } } int number = total / 10; int right = 0; // 布隆过滤器正确次数 int wrong = 0; // 布隆过滤器误判次数 for (int i = 0; i < number; i++) { int index = number / 1000; String str = i % (index) == 0 ? list6.get(i / index) : UUID.randomUUID().toString(); if (bloomFilter.contains(str)) { if (set6.contains(str)) { right++; } else { wrong++; } } } System.out.println("布隆过滤器容器大小:" + total); System.out.println("模拟数据量:" + number); System.out.println("正确次数:" + right); System.out.println("误判次数:" + wrong); NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMaximumFractionDigits(4); System.out.println("误判率:" + numberFormat.format((float) wrong / (float) number)); }
http://localhost:8080/test6?total=100000&rate=0.03&first=true
http://localhost:8080/test6?total=100000&rate=0.03&first=false (2次)
http://localhost:8080/test6?total=1000000&rate=0.03&first=false (3次)
改错误率
http://localhost:8080/test6?total=100000&rate=0.01&first=true
http://localhost:8080/test6?total=100000&rate=0.01&first=false (2次)
http://localhost:8080/test6?total=1000000&rate=0.01&first=false (3次)



