using System;
using System.Collections;
using NewLife.Reflection;
namespace NewLife.Collections
{
/// <summary>布隆过滤器</summary>
/// <remarks>
/// 以极小内存进行海量键值的存在判断,碰撞几率很小。
/// 依赖于Murmur128哈希算法。
/// </remarks>
public class BloomFilter
{
#region 构造
private readonly BitArray container = null;
private readonly Int32 _K;
/// <summary>位数组大小</summary>
public Int32 Length { get; }
/// <summary>K值,循环哈希次数</summary>
public Int32 K => _K;
/// <summary>实例化布隆过滤器</summary>
/// <param name="length">位数组大小。建议为预估数据量的32倍,可得到0.004%的误判率</param>
public BloomFilter(Int32 length)
{
container = new BitArray(length);
Length = length;
_K = 4;
}
/// <summary>实例化布隆过滤器</summary>
/// <param name="n">预估数据量</param>
/// <param name="fpp">期望的误判率。小于1</param>
public BloomFilter(Int64 n, Double fpp)
{
if (fpp is <= 0 or >= 1) fpp = 0.0001;
// 根据Guava算法计算位数组大小
Length = (Int32)(-n * Math.Log(fpp) / (Math.Log(2) * Math.Log(2)));
_K = Math.Max(1, (Int32)Math.Round(Length / n * Math.Log(2)));
container = new BitArray(Length);
}
/// <summary>初始化布隆过滤器</summary>
/// <param name="values"></param>
public BloomFilter(Byte[] values)
{
container = new BitArray(values);
Length = container.Length;
}
#endregion
#region 方法
/// <summary>设置指定键进入集合</summary>
/// <param name="key"></param>
public void Set(String key)
{
var buf = key.GetBytes().Murmur128();
var hash1 = buf.ToUInt64();
var hash2 = buf.ToUInt64(8);
var h = hash1;
for (var i = 0; i < _K; i++)
{
container[(Int32)((Int64)(h & Int64.MaxValue) % Length)] = true;
h += hash2;
}
}
/// <summary>判断指定键是否存在于集合中</summary>
/// <param name="key"></param>
/// <returns></returns>
public Boolean Get(String key)
{
var buf = key.GetBytes().Murmur128();
var hash1 = buf.ToUInt64();
var hash2 = buf.ToUInt64(8);
var h = hash1;
for (var i = 0; i < _K; i++)
{
if (!container[(Int32)((Int64)(h & Int64.MaxValue) % Length)]) return false;
h += hash2;
}
return true;
}
/// <summary>获取内部字符串</summary>
/// <returns></returns>
public Byte[] GetBytes()
{
var arr = container.GetValue("m_array") as Int32[];
var buf = new Byte[arr.Length * 32];
for (var i = 0; i < arr.Length; i++)
{
buf.Write((UInt32)arr[i], K << 5);
}
return buf;
}
/// <summary>获取内部字符串</summary>
/// <returns></returns>
public String GetString() => GetBytes().ToBase64();
#endregion
#region 哈希函数
//Int32 Hash1(String key)
//{
// var hash = 5381;
// var ks = key.ToCharArray();
// var count = ks.Length;
// while (count > 0)
// {
// hash += (hash << 5) + (ks[ks.Length - count]);
// count--;
// }
// return (hash & 0x7FFFFFFF) % container.Length;
//}
//Int32 Hash2(String key)
//{
// var seed = 131; // 31 131 1313 13131 131313 etc..
// var hash = 0;
// var ks = (key + "key2").ToCharArray();
// var count = ks.Length;
// while (count > 0)
// {
// hash = hash * seed + (ks[ks.Length - count]);
// count--;
// }
// return (hash & 0x7FFFFFFF) % container.Length;
//}
//Int32 Hash3(String key)
//{
// var hash = 0;
// Int32 i;
// var ks = (key + "keykey3").ToCharArray();
// var count = ks.Length;
// for (i = 0; i < count; i++)
// {
// if ((i & 1) == 0)
// hash ^= ((hash << 7) ^ (ks[i]) ^ (hash >> 3));
// else
// hash ^= (~((hash << 11) ^ (ks[i]) ^ (hash >> 5)));
// count--;
// }
// return (hash & 0x7FFFFFFF) % container.Length;
//}
//Int32 Hash4(String key)
//{
// var hash = 5381;
// var ks = (key + "keykeyke4").ToCharArray();
// var count = ks.Length;
// while (count > 0)
// {
// hash += (hash << 5) + (ks[ks.Length - count]);
// count--;
// }
// return (hash & 0x7FFFFFFF) % container.Length;
//}
#endregion
}
}
|