Implementation and Analysis of Hash Tables and Collision Resolution

Verified

Added on  2020/06/06

|7
|1158
|304
Homework Assignment
AI Summary
This assignment solution covers the implementation and analysis of hash tables and collision resolution techniques. The solution includes several tasks: Task 1 involves testing the basic functionalities of a hash table class, including setting, getting, and checking for the presence of items. Task 2 presents table size analysis. Task 3 explores collision counts with a custom class and hash function. Task 4 compares linear and quadratic probing for collision resolution, providing collision counts. Task 5 implements a chained hash table, demonstrating insertion. Task 6 focuses on quadratic probing with a custom hash function and collision counting. Finally, Task 7 calculates word frequencies in text files within a directory, demonstrating practical application. The solution is implemented in Python.
Document Page
Task1:
from task1 import Hash_Table
def testlen(): #Correct test
lengthtest = Hash_Table()
lengthtest.__setitem__("favNumber", "5")
length = lengthtest.__len__()
assert length == 1, "Oops! Wrong number!"
def testGetItem():
test = Hash_Table()
test.__setitem__("bf", "PM")
bfTest = test.__getitem__("bf")
assert bfTest == "PM", "Get item Non-functional"
def testSetItem(): #Correct test
setItemTest = Hash_Table()
setItemTest.__setitem__("favNumber", "Five") #You need to set the item first
favNumber = setItemTest.__getitem__("favNumber")
assert favNumber == "Five", "Set item Non-functional!"
def testcontains():
test = Hash_Table()
test.__setitem__("favSubject", "Maths")
subjectTest = test.__contains__("favSubject")
assert subjectTest == False, "incorrect input __contains__"
print(myHash)
if __name__ == '__main__':
testSetItem()
testlen()
#testcontains()
testGetItem()
print("All tests were passed !")
tabler-icon-diamond-filled.svg

Paraphrase This Document

Need a fresh take? Get an instant paraphrase of this document with our AI Paraphraser
Document Page
Task2:
Table Size Wall time Comments
210000
209987
400000
399989
202361
Task 3:
n = 1500
global collision_count
collision_count = 0
class Foo():
def __eq__(self, other):
global collision_count
collision_count += 1
return id(self) == id(other)
def __hash__(self):
#return id(self) # @John Machin: yes, I know!
return 1
objects = [Foo() for i in xrange(n)]
d = {}
for o in objects:
d[o] = 1
print collision_count
Document Page
Task 4
import random
import math
def linear_probe(random_list, m):
hash_table = [None]*m
count = 0
for i in random_list:
stop = False
hash = i % m
while not stop:
if hash_table[hash] == None:
hash_table[hash] = i
stop = True
else:
hash = (hash+1) % m
count +=1
return count
def quadratic_probe(random_list, m):
hash_table = [None]*m
count = 0
for i in random_list:
j = 1
stop = False
hash = i % m
while not stop:
if hash_table[hash] == None:
hash_table[hash] = i
stop = True
else:
hash = (hash+j) % m
count +=1
j = int((math.sqrt(j)+1) ** 2)
return count
def comp_lists(list1, list2):
Document Page
def comp_elem(elem1, elem2):
return 1 if elem1 < elem2 else 2
return map(comp_elem, list1, list2)
m = 1009
random_list=random.sample(range(10000), 1000)
count1 = linear_probe(random_list, m)
count2 = quadratic_probe(random_list, m)
print('linear probe collisions:',count1)
print('quadratic probe collisions:',count2)
Task 5
class MyChainHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = [ ]
for i in range(self.capacity):
self.slots.append([])
def __str__(self):
info = ""
for items in self.slots:
info += str(items)
return info
def __len__(self):
count = 0
for i in self.slots:
count += len(i)
return count
def hash_function(self, key):
i = key % self.capacity
return i
tabler-icon-diamond-filled.svg

Paraphrase This Document

Need a fresh take? Get an instant paraphrase of this document with our AI Paraphraser
Document Page
def insert(self, key):
self.slots[self.hash_function(key)].append(key)
x = MyChainHashTable(2)
print("Add 3, return:", x.insert(3))
print("Add 3, return:", x.insert(3))
print("Hashtable:", x)
Task 6
def __init__(self, size=200):
self.size = size
self.slots = [None] * self.size
def hash_function(self, key):
s = str(key)
n_hash = 0
for obj in s:
if obj.isdigit():
n_hash = (n_hash << 5) + n_hash + ord(obj)
return n_hash % self.size
def quad_prob(self, oldhash, iter):
n_hash = 0
n_hash = (oldhash + iter**2) % self.size
return n_hash
def put(self, key):
collis_count = 0
hashval = self.hash_function(key)
if self.slots[hashval] == None:
self.slots[hashval] = key
else:
if self.slots[hashval] == key:
pass
else:
iter_count = 1
first_hash = hashval
nextslot = self.quad_prob(first_hash, iter_count)
# looking for a key or any empty slot
Document Page
while self.slots[nextslot] != None and self.slots[nextslot] != key and iter_count <= self.size:
iter_count += 1
nextslot = self.quad_prob(first_hash, iter_count)
collis_count = iter_count
if self.slots[nextslot] == None:
self.slots[nextslot] = key
return collis_count
Task7
import os
from collections import Counter
import glob
def word_frequency(fileobj, words):
"""Build a Counter of specified words in fileobj"""
# initialise the counter to 0 for each word
ct = Counter(dict((w, 0) for w in words))
file_words = (word for line in fileobj for word in line.split())
filtered_words = (word for word in file_words if word in words)
return Counter(filtered_words)
def count_words_in_dir(dirpath, words, action=None):
"""For each .txt file in a dir, count the specified words"""
for filepath in glob.iglob(os.path.join(dirpath, '*.txt')):
with open(filepath) as f:
ct = word_frequency(f, words)
if action:
action(filepath, ct)
def print_summary(filepath, ct):
words = sorted(ct.keys())
counts = [str(ct[k]) for k in words]
print('{0}\n{1}\n{2}\n\n'.format(
filepath,
Document Page
', '.join(words),
', '.join(counts)))
words = set(['inflation', 'jobs', 'output'])
count_words_in_dir('./', words, action=print_summary)
chevron_up_icon
1 out of 7
circle_padding
hide_on_mobile
zoom_out_icon
[object Object]