手写一个简单的HashMap

代码实现

package com.thx;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

public class MyHashMap<K, V> {
    private static final int DEFAULT_CAPACITY = 16;
    private static final float DEFAULT_LOAD_FACTOR = 0.75f;
    private List<LinkedList<Node<K, V>>> buckets;
    private int size;
    private float loadFactor;
    private static class Node<K, V> {
        public K key;
        public V value;
        public Node(K key, V value) {
            this.key = key;
            this.value = value;
        }
        public Node() {}

    }

    public MyHashMap() {
        this(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR);
    }

    public MyHashMap(int capacity) {
        this(capacity, DEFAULT_LOAD_FACTOR);
    }

    public MyHashMap(int capacity, float loadFactor) {
        this.loadFactor = loadFactor;
        buckets = new ArrayList<>(capacity);
        for (int i = 0; i < capacity; i++) {
            buckets.add(new LinkedList<>());
        }
    }

    public int size() {
        return this.size;
    }

    // 哈希映射函数
    private int calHash(K key) {
        // 为了防止出现负数的哈希码，再取一下绝对值
        return Math.abs(key.hashCode() % buckets.size());
    }

    public V get(K key) {
        int index = calHash(key);
        LinkedList<Node<K, V>> bucket = buckets.get(index);
        Node<K, V> node = this.getNode(bucket, key);
        return node == null ? null : node.value;
    }

    private Node<K, V> getNode(LinkedList<Node<K, V>> bucket, K key){
        for (Node<K, V> node : bucket) {
            if (node.key.equals(key)) {
                return node;
            }
        }
        return null;
    }

    public void put(K key, V value) {
        int index = calHash(key);
        LinkedList<Node<K, V>> bucket = buckets.get(index);
        Node<K, V> node = this.getNode(bucket, key);
        if (node == null) {
            bucket.add(new Node<>(key, value));
            size++;
        } else {
            // 更新
            node.value = value;
        }
        // 扩容
        if ((double) size / buckets.size() > loadFactor) {
            rehash();
        }
    }

    // 两倍扩容
    private void rehash() {
        int newCapacity = buckets.size()*2;
        List<LinkedList<Node<K, V>>> newBuckets = new ArrayList<>(newCapacity);
        for (int i = 0; i < newCapacity; i++) {
            newBuckets.add(new LinkedList<>());
        }
        for (LinkedList<Node<K, V>> bucket : buckets) {
            for (Node<K, V> node : bucket) {
                int index = Math.abs(node.key.hashCode() % newCapacity);
                newBuckets.get(index).add(node);
            }
        }
        this.buckets = newBuckets;
    }

    public void remove(K key) {
        int index = this.calHash(key);
        LinkedList<Node<K, V>> bucket = buckets.get(index);
        Node<K, V> node = this.getNode(bucket, key);
        if (node != null) {
            bucket.remove(node);
            size--;
        }
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("{");
        for (LinkedList<Node<K, V>> bucket : buckets) {
            for (Node<K, V> node : bucket) {
                sb.append("("+node.key.toString()+","+node.value.toString()+"),");
            }
        }
        sb.setCharAt(sb.length()-1, '}');
        return sb.toString();
    }

    public boolean containsKey(K key) {
        int index = this.calHash(key);
        LinkedList<Node<K, V>> bucket = buckets.get(index);
        return this.getNode(bucket, key) != null;
    }
}

代码测试

package com.thx;

import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.LinkedList;

public class MyHashMapTest {
    public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException {
        MyHashMap<String, Integer> map = new MyHashMap<String, Integer>();
        map.put("apple", 2);
        map.put("banana", 3);
        map.put("orange", 5);
        map.put("apple", 6);
        map.remove("banana");
        System.out.println(map);
        System.out.println(map.size());
        for (int i = 0; i < 20; i++) {
            map.put("apple_"+i, i);
        }
        System.out.println(map);
        System.out.println(map.size());
        Field buckets = map.getClass().getDeclaredField("buckets");
        buckets.setAccessible(true);
        ArrayList o = (ArrayList) buckets.get(map);
        System.out.println(o.size());
        System.out.println(map.containsKey("apple"));
        System.out.println(map.containsKey("banana"));
    }
}

测试结果

{(orange,5),(apple,6)}
2
{(apple_10,10),(apple_11,11),(apple_12,12),(apple_13,13),(apple_14,14),(apple_15,15),(apple_16,16),(apple_17,17),(apple_9,9),(apple_18,18),(apple_8,8),(apple_19,19),(apple_7,7),(apple_6,6),(apple_5,5),(apple_4,4),(orange,5),(apple_3,3),(apple_2,2),(apple_1,1),(apple_0,0),(apple,6)}
22
32
true
false

进程已结束,退出代码0

★还可以改进的地方

数据结构还可以再灵活一点

可以不用Java自带的LinkedList，而是定义Entry<K, V>这样一个数据结构，并且用Node、TreeNode去继承Entry<K, V>，从而实现程序的可扩展性。在源码中，Entry 是一个接口，用于表示键值对，Node 是一个单链表节点的内部类，用于在 HashMap 的桶中存储键值对，TreeNode 是一个红黑树节点的内部类，用于在 HashMap 的桶中存储键值对。

当链表长度超过阈值时进行树化

还是接着上面那点，如果哈希冲突过多，就会影响访问效率，因此将链表转为红黑树是合理的。在HashMap源码中，首先会判断table的长度是否大于64，如果小于64，就通过扩容的方式来解决，避免红黑树结构化。

以下是它的getNode方法的源码，在遍历的时候根据first instanceof TreeNode来判断当前是链表还是红黑树。

//如果key和第一个结点不匹配，则看.next是否为空，不为null则继续，为空则返回null
if ((e = first.next) != null) {
    //如果此时是红黑树的结构，则进行处理getTreeNode()方法搜索key
    if (first instanceof TreeNode)
        return ((TreeNode<K,V>)first).getTreeNode(hash, key);
    //是链表结构的话就一个一个遍历，直到找到key对应的结点，
    //或者e的下一个结点为null退出循环
    do {
        if (e.hash == hash &&
            ((k = e.key) == key || (key != null && key.equals(k))))
            return e;
    } while ((e = e.next) != null);
}

当然了，如果嫌手写红黑树麻烦的话，那就用TreeMap吧！

哈希算法还可以再灵活一点

我们可以参考一下源码是怎么实现的。

哈希值的计算

在源码中，会使用”扰动函数” 的算法对初始哈希值进行变换，以消除低位的取模运算时丢掉高位所引起的误差。

// 哈希值计算（计算好以后赋值给Node中的变量进行保存，防止重复计算）
static final int hash(Object key) {
    int h;
    // 原来的hashCode无符号右移16位后和自身进行异或
    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

异或运算能更好的保留各部分的特征，如果采用&运算计算出来的值会向0靠拢，采用|运算计算出来的值会向1靠拢。

定位函数

1	(n - 1) & hash

本质上还是取余的操作。只不过n始终是2的幂，所以每一种情况出现的可能性都是相等的。

迭代器遍历

迭代器模式是设计模式的一种，它可以对外提供统一的访问方式，而不用关心底层的实现。在HashMap源码中，定义了Iterator这个接口类型的成员变量，在HashMap中进行了实现，名字叫做HashIterator，是一个抽象类。EntryIterator、KeyIterator、ValueIterator是它的子类，对外提供三种不同的访问方式。HashIterator的源码如下，它虽然没有直接继承Iterator，但还是直接重写了Iterator中的next和hasNext方法。另外，还有modCount这种快速失败机制，之前在将List的时候说过，在这里不再赘述。

// HashIterator是写在HashMap里面的，是一个抽象的内部类
abstract class HashIterator {
       Node<K,V> next;        // next entry to return
       Node<K,V> current;     // current entry
       int expectedModCount;  // for fast-fail
       int index;             // current slot

       HashIterator() {
           expectedModCount = modCount;
           Node<K,V>[] t = table;
           current = next = null;
           index = 0;
           if (t != null && size > 0) { // advance to first entry
               do {} while (index < t.length && (next = t[index++]) == null);
           }
       }

       public final boolean hasNext() {
           return next != null;
       }

       final Node<K,V> nextNode() {
           Node<K,V>[] t;
           Node<K,V> e = next;
           if (modCount != expectedModCount)
               throw new ConcurrentModificationException();
           if (e == null)
               throw new NoSuchElementException();
           if ((next = (current = e).next) == null && (t = table) != null) {
               do {} while (index < t.length && (next = t[index++]) == null);
           }
           return e;
       }

       public final void remove() {
           Node<K,V> p = current;
           if (p == null)
               throw new IllegalStateException();
           if (modCount != expectedModCount)
               throw new ConcurrentModificationException();
           current = null;
           removeNode(p.hash, p.key, null, false, false);
           expectedModCount = modCount;
       }
   }

// EntryIterator、KeyIterator、ValueIterator同样是HashMap里面的成员变量
final class KeyIterator extends HashIterator
       implements Iterator<K> {
       public final K next() { return nextNode().key; }
   }

   final class ValueIterator extends HashIterator
       implements Iterator<V> {
       public final V next() { return nextNode().value; }
   }

   final class EntryIterator extends HashIterator
       implements Iterator<Map.Entry<K,V>> {
       public final Map.Entry<K,V> next() { return nextNode(); }
   }

使用设计模式的思想来优化程序的扩展性

我们可以从源码的实现来学习到如下的软件设计原则：

依赖倒置原则

正如上面提到的，Map是一种映射的关系，实现有很多，所以大部分情况下都是依赖的是抽象的Map接口，而不是HashMap这样的实现类，体现的了依赖倒置的原则。再比如迭代器的访问都是统一的，大多数情况下我们都直接使用统一的访问方式，所以依赖的是抽象的Iterator接口而不是像EntryIterator这样具体的接口。还有，哈希桶依赖的是Node这样抽象的父类，可以使得 HashMap 可以灵活地调整内部实现。