Added a Bloomfilter and fixed some bugs
This commit is contained in:
parent
9e3de28d45
commit
017a27931a
8 changed files with 425 additions and 24 deletions
|
|
@ -5,9 +5,10 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.util.BitSet;
|
||||
|
||||
public class Converter {
|
||||
|
||||
|
||||
/**
|
||||
* Converts an object to an array of bytes.
|
||||
*
|
||||
|
|
@ -26,7 +27,7 @@ public class Converter {
|
|||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts an array of bytes back to its constituent object. The
|
||||
* input array is assumed to have been created from the original object.
|
||||
|
|
@ -49,12 +50,13 @@ public class Converter {
|
|||
}
|
||||
return object;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the given interface is implemented in the object
|
||||
* @param object The object to look for the interface
|
||||
* @param interf The interface to look for
|
||||
* @return True if the interface is implemented else false
|
||||
*
|
||||
* @param object the object to look for the interface
|
||||
* @param interf the interface to look for
|
||||
* @return true if the interface is implemented else false
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static boolean isInstanceOf(Object object, Class interf){
|
||||
|
|
@ -66,4 +68,85 @@ public class Converter {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// array neaded for byteToHex
|
||||
private static char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
|
||||
/**
|
||||
* Converts a byte Array to a Hex String
|
||||
*
|
||||
* @param raw the byte arrat to convert
|
||||
* @return a Hex String
|
||||
*/
|
||||
public static String toHexString(byte[] raw){
|
||||
StringBuffer ret = new StringBuffer();
|
||||
|
||||
for(byte b : raw){
|
||||
ret.append(HEX_CHARS[(int) b & 0x0F ]);
|
||||
ret.append(HEX_CHARS[(int) (b >>> 0x04)& 0x0F ]);
|
||||
}
|
||||
|
||||
return ret.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the given byte to a String with 1's and 0's
|
||||
*
|
||||
* @param raw the byte to convert
|
||||
* @return a String with 1's and 0's
|
||||
*/
|
||||
public static String toString(byte raw){
|
||||
StringBuffer ret = new StringBuffer();
|
||||
for(int i=128; i>0 ;i=( i<1 ? i=0 : i/2 ) ){
|
||||
ret.append(( (raw & i) == 0 ? '0' : '1'));
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the given byte array to a String with 1's and 0's
|
||||
*
|
||||
* @param raw the byte array to convert
|
||||
* @return a String with 1's and 0's
|
||||
*/
|
||||
public static String toString(byte[] raw){
|
||||
StringBuffer ret = new StringBuffer();
|
||||
for(byte b : raw){
|
||||
for(int i=128; i>0 ;i=( i<1 ? i=0 : i/2 ) ){
|
||||
ret.append(( (b & i) == 0 ? '0' : '1'));
|
||||
}
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a BitSet to a Integer
|
||||
*
|
||||
* @param bits the BitSet to convert
|
||||
* @return a Integer
|
||||
*/
|
||||
public static int toInt(BitSet bits){
|
||||
int ret = 0;
|
||||
|
||||
for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i+1)) {
|
||||
ret += Math.pow(2, i);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a Integer to a BitSet
|
||||
*
|
||||
* @param i the Integer to convert
|
||||
* @return a BitSet object
|
||||
*/
|
||||
public static BitSet toBitSet(int num){
|
||||
BitSet ret = new BitSet();
|
||||
String tmp = Integer.toBinaryString(num);
|
||||
|
||||
for(int i=0; i<tmp.length() ;i++){
|
||||
ret.set(i , tmp.charAt(tmp.length()-i-1) != '0');
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,15 +109,17 @@ public class FileFinder {
|
|||
String[] temp = dir.list();
|
||||
File file;
|
||||
|
||||
for(int i=0; i<temp.length ;i++){
|
||||
file = new File(dir.getPath()+File.separator+temp[i]);
|
||||
if(file.isDirectory()){
|
||||
search(new File(dir.getPath()+File.separator+temp[i]+File.separator),fileList);
|
||||
if(temp != null){
|
||||
for(int i=0; i<temp.length ;i++){
|
||||
file = new File(dir.getPath()+File.separator+temp[i]);
|
||||
if(file.isDirectory()){
|
||||
search(new File(dir.getPath()+File.separator+temp[i]+File.separator),fileList);
|
||||
}
|
||||
else if(file.isFile()){
|
||||
MultiPrintStream.out.println("File Found: "+file);
|
||||
fileList.add(file);
|
||||
}
|
||||
}
|
||||
else if(file.isFile()){
|
||||
MultiPrintStream.out.println("File Found: "+file);
|
||||
fileList.add(file);
|
||||
}
|
||||
}
|
||||
|
||||
return fileList;
|
||||
|
|
|
|||
|
|
@ -9,10 +9,8 @@ import java.math.BigInteger;
|
|||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
import sun.misc.BASE64Encoder;
|
||||
|
||||
public class Hasher {
|
||||
|
||||
|
||||
/**
|
||||
* Returns a hash of a file
|
||||
*
|
||||
|
|
@ -40,16 +38,46 @@ public class Hasher {
|
|||
throw new RuntimeException("Unable to process file for MD5", e);
|
||||
}
|
||||
is.close();
|
||||
|
||||
|
||||
MultiPrintStream.out.println("File Hash: "+output);
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the MD5 hash of the given object
|
||||
*
|
||||
* @param object The object to hash
|
||||
* @return String containing the hash
|
||||
*/
|
||||
public static String MD5(Serializable object){
|
||||
try {
|
||||
return hash(object, "MD5");
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the SHA-1 hash of the given object
|
||||
*
|
||||
* @param object The object to hash
|
||||
* @return String containing the hash
|
||||
*/
|
||||
public static String SHA1(Serializable object){
|
||||
try {
|
||||
return hash(object, "SHA-1");
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the hash of the given object
|
||||
*
|
||||
* @param object The object to hash
|
||||
* @param hashType The hash method
|
||||
* @param hashType The hash method (MD2, MD5, SHA-1, SHA-256, SHA-384, SHA-512 )
|
||||
* @return String containing the hash
|
||||
* @throws NoSuchAlgorithmException
|
||||
*/
|
||||
|
|
@ -59,6 +87,61 @@ public class Hasher {
|
|||
md.update(Converter.toBytes(object));
|
||||
|
||||
byte raw[] = md.digest();
|
||||
return (new BASE64Encoder()).encode(raw);
|
||||
return Converter.toHexString(raw);//(new BASE64Encoder()).encode(raw);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* MurmurHash2 ported from cpp source
|
||||
*
|
||||
* @param object The Key
|
||||
* @param seed Seed
|
||||
* @return A MurmurHash of the key
|
||||
*/
|
||||
public static int MurmurHash(Serializable object, int seed){
|
||||
byte[] data = Converter.toBytes(object);
|
||||
int length = data.length;
|
||||
|
||||
//Constants
|
||||
int m = 0x5bd1e995;
|
||||
int r = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
int h = seed ^ length;
|
||||
|
||||
int i=0;
|
||||
for(; i+4<length ;i+=4){
|
||||
// get the first 4 bytes
|
||||
int k = data[i+3] & 0xff;
|
||||
k <<= 8;
|
||||
k |= data[i+2] & 0xff;
|
||||
k <<= 8;
|
||||
k |= data[i+1] & 0xff;
|
||||
k <<= 8;
|
||||
k |= data[i+0] & 0xff;
|
||||
|
||||
k *= m;
|
||||
k ^= k >>> r;
|
||||
k *= m;
|
||||
|
||||
h *= m;
|
||||
h ^= k;
|
||||
}
|
||||
|
||||
// Handle the last few bytes of the input
|
||||
i = length % 4;
|
||||
|
||||
switch(i){
|
||||
case 3: h ^= data[length-3] << 16;
|
||||
case 2: h ^= data[length-2] << 8;
|
||||
case 1: h ^= data[length-1];
|
||||
h *= m;
|
||||
}
|
||||
|
||||
h ^= h >>> 13;
|
||||
h *= m;
|
||||
h ^= h >>> 15;
|
||||
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package zutil.network.nio.service;
|
||||
package zutil.network.nio.service.chat;
|
||||
|
||||
/**
|
||||
* Tis is a listener class for new chat messages
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package zutil.network.nio.service;
|
||||
package zutil.network.nio.service.chat;
|
||||
|
||||
import java.nio.channels.SocketChannel;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -8,6 +8,7 @@ import zutil.MultiPrintStream;
|
|||
import zutil.network.nio.NioNetwork;
|
||||
import zutil.network.nio.message.ChatMessage;
|
||||
import zutil.network.nio.message.Message;
|
||||
import zutil.network.nio.service.NetworkService;
|
||||
|
||||
public class ChatService extends NetworkService{
|
||||
private HashMap<String,LinkedList<SocketChannel>> rooms;
|
||||
|
|
@ -11,8 +11,8 @@ import zutil.network.nio.message.SyncMessage;
|
|||
import zutil.network.nio.message.type.EchoMessage;
|
||||
import zutil.network.nio.message.type.ResponseRequestMessage;
|
||||
import zutil.network.nio.response.ResponseEvent;
|
||||
import zutil.network.nio.service.ChatService;
|
||||
import zutil.network.nio.service.NetworkService;
|
||||
import zutil.network.nio.service.chat.ChatService;
|
||||
import zutil.network.nio.service.sync.SyncService;
|
||||
|
||||
|
||||
|
|
|
|||
168
src/zutil/struct/BloomFilter.java
Normal file
168
src/zutil/struct/BloomFilter.java
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
package zutil.struct;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
import zutil.Hasher;
|
||||
|
||||
/**
|
||||
* A implementation of a bloom filter
|
||||
* @author Ziver
|
||||
*
|
||||
*/
|
||||
public class BloomFilter<T extends Serializable> implements Set<T>, Serializable{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private BitSet bits;
|
||||
private int content_size;
|
||||
private int optimal_size;
|
||||
private int k;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a bloom filter
|
||||
*
|
||||
* @param size The amount of bits in the filter
|
||||
* @param expected_data_count The estimated amount of data to
|
||||
* be inserted(a bigger number is better than a smaller)
|
||||
*/
|
||||
public BloomFilter(int size, int expected_data_count){
|
||||
bits = new BitSet(size);
|
||||
k = (int)((size/expected_data_count) * Math.log(2));
|
||||
content_size = 0;
|
||||
optimal_size = expected_data_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param e A Serializable object
|
||||
* @return If the optimal size has been reached
|
||||
*/
|
||||
public boolean add(T e) {
|
||||
content_size++;
|
||||
int hash = 0;
|
||||
for(int i=0; i<k ;i++){
|
||||
hash = Hasher.MurmurHash(e, hash);
|
||||
hash = Math.abs(hash) % bits.size();
|
||||
bits.set(hash, true);
|
||||
}
|
||||
return isFull();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a collection to the bloom filter
|
||||
*
|
||||
* @return If the optimal size has been reached
|
||||
*/
|
||||
public boolean addAll(Collection<? extends T> c) {
|
||||
for(T t : c){
|
||||
add(t);
|
||||
}
|
||||
return isFull();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return clears the filter
|
||||
*/
|
||||
public void clear() {
|
||||
content_size = 0;
|
||||
bits.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param o The Serializable object to search for
|
||||
* @return If the object contains in the filter or false
|
||||
* if the Object is not Serializable
|
||||
*/
|
||||
public boolean contains(Object o) {
|
||||
if(!(o instanceof Serializable))return false;
|
||||
int hash = 0;
|
||||
for(int i=0; i<k ;i++){
|
||||
hash = Hasher.MurmurHash((Serializable)o, hash);
|
||||
hash = Math.abs(hash) % bits.size();
|
||||
if(!bits.get(hash))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the whole collection contains in the filter
|
||||
*
|
||||
* @param c The collection
|
||||
*/
|
||||
public boolean containsAll(Collection<?> c) {
|
||||
for(Object o : c){
|
||||
if(!contains(o)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return If the bloom filter is empty
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return content_size == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return If the optimal size has been reached
|
||||
*/
|
||||
public boolean isFull() {
|
||||
return content_size > optimal_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The number of data added
|
||||
*/
|
||||
public int size() {
|
||||
return content_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The false posetive probability of the current state of the filter
|
||||
*/
|
||||
public double falsePosetiveProbability(){
|
||||
return Math.pow(0.6185, bits.size()/content_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the hash count. Should be set before adding elements
|
||||
* or the already added elements will be lost
|
||||
*
|
||||
* @param k The hash count
|
||||
*/
|
||||
public void setHashCount(int k){
|
||||
this.k = k;
|
||||
}
|
||||
|
||||
//*********************************************************************
|
||||
//*********************************************************************
|
||||
public Object[] toArray() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@SuppressWarnings("hiding")
|
||||
public <T> T[] toArray(T[] a) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public Iterator<T> iterator() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public boolean remove(Object o) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public boolean removeAll(Collection<?> c) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public boolean retainAll(Collection<?> c) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
64
src/zutil/test/BloomFilterTest.java
Normal file
64
src/zutil/test/BloomFilterTest.java
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
package zutil.test;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
|
||||
import zutil.struct.BloomFilter;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* This code may be used, modified, and redistributed provided that the
|
||||
* author tag below remains intact.
|
||||
*
|
||||
* @author Ian Clarke <ian@uprizer.com>
|
||||
*/
|
||||
|
||||
public class BloomFilterTest extends TestCase {
|
||||
public void testBloomFilter() {
|
||||
DecimalFormat df = new DecimalFormat("0.00000");
|
||||
Random r = new Random(124445l);
|
||||
int bfSize = 400000;
|
||||
System.out.println("Testing " + bfSize + " bit SimpleBloomFilter");
|
||||
for (int i = 5; i < 10; i++) {
|
||||
int addCount = 10000 * (i + 1);
|
||||
BloomFilter<Integer> bf = new BloomFilter<Integer>(bfSize, addCount);
|
||||
HashSet<Integer> added = new HashSet<Integer>();
|
||||
for (int x = 0; x < addCount; x++) {
|
||||
int num = r.nextInt();
|
||||
added.add(num);
|
||||
}
|
||||
bf.addAll(added);
|
||||
assertTrue("Assert that there are no false negatives", bf
|
||||
.containsAll(added));
|
||||
|
||||
int falsePositives = 0;
|
||||
for (int x = 0; x < addCount; x++) {
|
||||
int num = r.nextInt();
|
||||
|
||||
// Ensure that this random number hasn't been added already
|
||||
if (added.contains(num)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If necessary, record a false positive
|
||||
if (bf.contains(num)) {
|
||||
falsePositives++;
|
||||
}
|
||||
}
|
||||
double expectedFP = bf.falsePosetiveProbability();
|
||||
double actualFP = (double) falsePositives / (double) addCount;
|
||||
System.out.println("Got " + falsePositives
|
||||
+ " false positives out of " + addCount + " added items, rate = "
|
||||
+ df.format(actualFP) + ", expected = "
|
||||
+ df.format(expectedFP));
|
||||
double ratio = expectedFP/actualFP;
|
||||
assertTrue(
|
||||
"Assert that the actual false positive rate doesn't deviate by more than 10% from what was predicted",
|
||||
ratio > 0.9 && ratio < 1.1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue