From 017a27931a2eff16a347d150a03d371e80fde435 Mon Sep 17 00:00:00 2001 From: Ziver Koc Date: Sun, 8 Feb 2009 21:32:06 +0000 Subject: [PATCH] Added a Bloomfilter and fixed some bugs --- src/zutil/Converter.java | 95 +++++++++- src/zutil/FileFinder.java | 18 +- src/zutil/Hasher.java | 97 +++++++++- .../nio/service/{ => chat}/ChatListener.java | 2 +- .../nio/service/{ => chat}/ChatService.java | 3 +- .../network/nio/worker/SystemWorker.java | 2 +- src/zutil/struct/BloomFilter.java | 168 ++++++++++++++++++ src/zutil/test/BloomFilterTest.java | 64 +++++++ 8 files changed, 425 insertions(+), 24 deletions(-) rename src/zutil/network/nio/service/{ => chat}/ChatListener.java (76%) rename src/zutil/network/nio/service/{ => chat}/ChatService.java (93%) create mode 100644 src/zutil/struct/BloomFilter.java create mode 100644 src/zutil/test/BloomFilterTest.java diff --git a/src/zutil/Converter.java b/src/zutil/Converter.java index c30d90a..8757e39 100644 --- a/src/zutil/Converter.java +++ b/src/zutil/Converter.java @@ -5,9 +5,10 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.BitSet; public class Converter { - + /** * Converts an object to an array of bytes. * @@ -26,7 +27,7 @@ public class Converter { } return baos.toByteArray(); } - + /** * Converts an array of bytes back to its constituent object. The * input array is assumed to have been created from the original object. @@ -49,12 +50,13 @@ public class Converter { } return object; } - + /** * Checks if the given interface is implemented in the object - * @param object The object to look for the interface - * @param interf The interface to look for - * @return True if the interface is implemented else false + * + * @param object the object to look for the interface + * @param interf the interface to look for + * @return true if the interface is implemented else false */ @SuppressWarnings("unchecked") public static boolean isInstanceOf(Object object, Class interf){ @@ -66,4 +68,85 @@ public class Converter { } return false; } + + // array neaded for byteToHex + private static char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + /** + * Converts a byte Array to a Hex String + * + * @param raw the byte arrat to convert + * @return a Hex String + */ + public static String toHexString(byte[] raw){ + StringBuffer ret = new StringBuffer(); + + for(byte b : raw){ + ret.append(HEX_CHARS[(int) b & 0x0F ]); + ret.append(HEX_CHARS[(int) (b >>> 0x04)& 0x0F ]); + } + + return ret.toString(); + } + + /** + * Converts the given byte to a String with 1's and 0's + * + * @param raw the byte to convert + * @return a String with 1's and 0's + */ + public static String toString(byte raw){ + StringBuffer ret = new StringBuffer(); + for(int i=128; i>0 ;i=( i<1 ? i=0 : i/2 ) ){ + ret.append(( (raw & i) == 0 ? '0' : '1')); + } + return ret.toString(); + } + + /** + * Converts the given byte array to a String with 1's and 0's + * + * @param raw the byte array to convert + * @return a String with 1's and 0's + */ + public static String toString(byte[] raw){ + StringBuffer ret = new StringBuffer(); + for(byte b : raw){ + for(int i=128; i>0 ;i=( i<1 ? i=0 : i/2 ) ){ + ret.append(( (b & i) == 0 ? '0' : '1')); + } + } + return ret.toString(); + } + + /** + * Converts a BitSet to a Integer + * + * @param bits the BitSet to convert + * @return a Integer + */ + public static int toInt(BitSet bits){ + int ret = 0; + + for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i+1)) { + ret += Math.pow(2, i); + } + + return ret; + } + + /** + * Converts a Integer to a BitSet + * + * @param i the Integer to convert + * @return a BitSet object + */ + public static BitSet toBitSet(int num){ + BitSet ret = new BitSet(); + String tmp = Integer.toBinaryString(num); + + for(int i=0; i>> r; + k *= m; + + h *= m; + h ^= k; + } + + // Handle the last few bytes of the input + i = length % 4; + + switch(i){ + case 3: h ^= data[length-3] << 16; + case 2: h ^= data[length-2] << 8; + case 1: h ^= data[length-1]; + h *= m; + } + + h ^= h >>> 13; + h *= m; + h ^= h >>> 15; + + return h; + } +} \ No newline at end of file diff --git a/src/zutil/network/nio/service/ChatListener.java b/src/zutil/network/nio/service/chat/ChatListener.java similarity index 76% rename from src/zutil/network/nio/service/ChatListener.java rename to src/zutil/network/nio/service/chat/ChatListener.java index e7ca05b..6f07635 100644 --- a/src/zutil/network/nio/service/ChatListener.java +++ b/src/zutil/network/nio/service/chat/ChatListener.java @@ -1,4 +1,4 @@ -package zutil.network.nio.service; +package zutil.network.nio.service.chat; /** * Tis is a listener class for new chat messages diff --git a/src/zutil/network/nio/service/ChatService.java b/src/zutil/network/nio/service/chat/ChatService.java similarity index 93% rename from src/zutil/network/nio/service/ChatService.java rename to src/zutil/network/nio/service/chat/ChatService.java index a4fbdae..2c77f62 100644 --- a/src/zutil/network/nio/service/ChatService.java +++ b/src/zutil/network/nio/service/chat/ChatService.java @@ -1,4 +1,4 @@ -package zutil.network.nio.service; +package zutil.network.nio.service.chat; import java.nio.channels.SocketChannel; import java.util.HashMap; @@ -8,6 +8,7 @@ import zutil.MultiPrintStream; import zutil.network.nio.NioNetwork; import zutil.network.nio.message.ChatMessage; import zutil.network.nio.message.Message; +import zutil.network.nio.service.NetworkService; public class ChatService extends NetworkService{ private HashMap> rooms; diff --git a/src/zutil/network/nio/worker/SystemWorker.java b/src/zutil/network/nio/worker/SystemWorker.java index 51ef1e6..f1b81b7 100644 --- a/src/zutil/network/nio/worker/SystemWorker.java +++ b/src/zutil/network/nio/worker/SystemWorker.java @@ -11,8 +11,8 @@ import zutil.network.nio.message.SyncMessage; import zutil.network.nio.message.type.EchoMessage; import zutil.network.nio.message.type.ResponseRequestMessage; import zutil.network.nio.response.ResponseEvent; -import zutil.network.nio.service.ChatService; import zutil.network.nio.service.NetworkService; +import zutil.network.nio.service.chat.ChatService; import zutil.network.nio.service.sync.SyncService; diff --git a/src/zutil/struct/BloomFilter.java b/src/zutil/struct/BloomFilter.java new file mode 100644 index 0000000..2978d3e --- /dev/null +++ b/src/zutil/struct/BloomFilter.java @@ -0,0 +1,168 @@ +package zutil.struct; + +import java.io.Serializable; +import java.util.BitSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; + +import zutil.Hasher; + +/** + * A implementation of a bloom filter + * @author Ziver + * + */ +public class BloomFilter implements Set, Serializable{ + private static final long serialVersionUID = 1L; + + private BitSet bits; + private int content_size; + private int optimal_size; + private int k; + + + /** + * Creates a bloom filter + * + * @param size The amount of bits in the filter + * @param expected_data_count The estimated amount of data to + * be inserted(a bigger number is better than a smaller) + */ + public BloomFilter(int size, int expected_data_count){ + bits = new BitSet(size); + k = (int)((size/expected_data_count) * Math.log(2)); + content_size = 0; + optimal_size = expected_data_count; + } + + /** + * @param e A Serializable object + * @return If the optimal size has been reached + */ + public boolean add(T e) { + content_size++; + int hash = 0; + for(int i=0; i c) { + for(T t : c){ + add(t); + } + return isFull(); + } + + /** + * @return clears the filter + */ + public void clear() { + content_size = 0; + bits.clear(); + } + + /** + * @param o The Serializable object to search for + * @return If the object contains in the filter or false + * if the Object is not Serializable + */ + public boolean contains(Object o) { + if(!(o instanceof Serializable))return false; + int hash = 0; + for(int i=0; i c) { + for(Object o : c){ + if(!contains(o)) return false; + } + return true; + } + + /** + * @return If the bloom filter is empty + */ + public boolean isEmpty() { + return content_size == 0; + } + + /** + * @return If the optimal size has been reached + */ + public boolean isFull() { + return content_size > optimal_size; + } + + /** + * @return The number of data added + */ + public int size() { + return content_size; + } + + /** + * @return The false posetive probability of the current state of the filter + */ + public double falsePosetiveProbability(){ + return Math.pow(0.6185, bits.size()/content_size); + } + + /** + * Set the hash count. Should be set before adding elements + * or the already added elements will be lost + * + * @param k The hash count + */ + public void setHashCount(int k){ + this.k = k; + } + + //********************************************************************* + //********************************************************************* + public Object[] toArray() { + throw new UnsupportedOperationException(); + } + + @SuppressWarnings("hiding") + public T[] toArray(T[] a) { + throw new UnsupportedOperationException(); + } + + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + public boolean remove(Object o) { + throw new UnsupportedOperationException(); + } + + public boolean removeAll(Collection c) { + throw new UnsupportedOperationException(); + } + + public boolean retainAll(Collection c) { + throw new UnsupportedOperationException(); + } +} diff --git a/src/zutil/test/BloomFilterTest.java b/src/zutil/test/BloomFilterTest.java new file mode 100644 index 0000000..5939cf2 --- /dev/null +++ b/src/zutil/test/BloomFilterTest.java @@ -0,0 +1,64 @@ +package zutil.test; + +import java.text.DecimalFormat; +import java.util.HashSet; +import java.util.Random; + +import zutil.struct.BloomFilter; + +import junit.framework.TestCase; + +/** + * This code may be used, modified, and redistributed provided that the + * author tag below remains intact. + * + * @author Ian Clarke + */ + +public class BloomFilterTest extends TestCase { + public void testBloomFilter() { + DecimalFormat df = new DecimalFormat("0.00000"); + Random r = new Random(124445l); + int bfSize = 400000; + System.out.println("Testing " + bfSize + " bit SimpleBloomFilter"); + for (int i = 5; i < 10; i++) { + int addCount = 10000 * (i + 1); + BloomFilter bf = new BloomFilter(bfSize, addCount); + HashSet added = new HashSet(); + for (int x = 0; x < addCount; x++) { + int num = r.nextInt(); + added.add(num); + } + bf.addAll(added); + assertTrue("Assert that there are no false negatives", bf + .containsAll(added)); + + int falsePositives = 0; + for (int x = 0; x < addCount; x++) { + int num = r.nextInt(); + + // Ensure that this random number hasn't been added already + if (added.contains(num)) { + continue; + } + + // If necessary, record a false positive + if (bf.contains(num)) { + falsePositives++; + } + } + double expectedFP = bf.falsePosetiveProbability(); + double actualFP = (double) falsePositives / (double) addCount; + System.out.println("Got " + falsePositives + + " false positives out of " + addCount + " added items, rate = " + + df.format(actualFP) + ", expected = " + + df.format(expectedFP)); + double ratio = expectedFP/actualFP; + assertTrue( + "Assert that the actual false positive rate doesn't deviate by more than 10% from what was predicted", + ratio > 0.9 && ratio < 1.1); + } + } + + +}