写在最前

分析基于JDK1.7

1. 基础

默认大小为16
默认加载因子为0.75
用Entry数组实现，Entry有key, value, next, hash三个字段。
HashMap的equals比较的都是key和value的equals方法
最大容量2的30次方.
threshold = capacity * loadFactor （容量x加载因子）

2. HashMap源码分析过程

2.1 初始化table数组

/**
 * Inflates the table.
 */
private void inflateTable(int toSize) {
    // Find a power of 2 >= toSize
    int capacity = roundUpToPowerOf2(toSize);
    // threshold为允许的最大容量和设置容量*加载因子的数值较小者
    threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
    table = new Entry[capacity];
    initHashSeedAsNeeded(capacity);
}

table数组为：new Entry[xxx]

2.2 获取大于或等于number的最小2次幂

private static int roundUpToPowerOf2(int number) {
    // assert number >= 0 : "number must be non-negative";
    return number >= MAXIMUM_CAPACITY
            ? MAXIMUM_CAPACITY
            : (number > 1) ? Integer.highestOneBit((number - 1) << 1) : 1;
}

说明：Integer.highestOneBit((number - 1) << 1)中的number-1为必须的。如果不减一，原本已经是2的次幂的将扩大一倍。如：4 -> 8，但实际是如果number已经为4了，我们想要的还是4本身。

2.3 延迟初始化hash种子

根据需要初始化hash种子

final boolean initHashSeedAsNeeded(int capacity) {
    boolean currentAltHashing = hashSeed != 0;
    boolean useAltHashing = sun.misc.VM.isBooted() &&
            (capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD);
    boolean switching = currentAltHashing ^ useAltHashing;
    if (switching) {
        hashSeed = useAltHashing
            ? sun.misc.Hashing.randomHashSeed(this)
            : 0;
    }
    return switching;
}

我们看看hashSeed变量是如何定义的：

    /**
 * A randomizing value associated with this instance that is applied to
 * hash code of keys to make hash collisions harder to find. If 0 then
 * alternative hashing is disabled.
 */
 
/**
 * hashseed是一个绑定在hashmap实例上的一个随机值，这个随机值是应用于hashmap
 * 的key上的，目的是使hash碰撞更加困难。如果hashseed为0，一个可选择的hash 
 * 算法是被禁用的。
 */
transient int hashSeed = 0;

2.4 Key为null时

put方法的一部分

public V put(K key, V value) {
    if (table == EMPTY_TABLE) {
        inflateTable(threshold);
    }
    // key可以为空
    if (key == null)
        // key为null时的处理
        return putForNullKey(value);
....
}

putForNullKey方法

private V putForNullKey(V value) {
    // 遍历table数组，寻找key=null
    for (Entry<K,V> e = table[0]; e != null; e = e.next) {
        if (e.key == null) {
            V oldValue = e.value;
            // 用新value替换旧value
            e.value = value;
            e.recordAccess(this);
            // 返回旧值
            return oldValue;
        }
    }
    // 记录修改次数
    modCount++;
    addEntry(0, null, value, 0);
    return null;
}

其中，调用了addEntry(0, null, value, 0)方法，我们看看addEntry方法

addEntry方法

// hash, key,value, bucketIndex四个参数
// 根据上面的调用，我们知道这里的：hash=0; key=null; value=xxx; bucketIndex=0
void addEntry(int hash, K key, V value, int bucketIndex) {
// 1. 判断当前大小size和阈值threshold的大小，并且判断table[0]是否为null
// 2. 如果满足1的全部条件，则进行hashmap的resize，扩容2*table.length
if ((size >= threshold) && (null != table[bucketIndex])) {
    // 扩容
    resize(2 * table.length);
    // 计算key的hash code
    hash = (null != key) ? hash(key) : 0;
    // 计算桶的位置
    bucketIndex = indexFor(hash, table.length);
}
// 创建Entry
createEntry(hash, key, value, bucketIndex);
}

resize方法

void resize(int newCapacity) {
    Entry[] oldTable = table;
    int oldCapacity = oldTable.length;
    // 如果旧容量已经等于允许的最大容量了，就设置threshold为最大容量，然后结束，不进行后续的扩容了
    if (oldCapacity == MAXIMUM_CAPACITY) {
        threshold = Integer.MAX_VALUE;
        return;
    }

    // 申请新的Entry数组，初始大小为新容量，也就是2*table.length
    Entry[] newTable = new Entry[newCapacity];
    // 根据新的容量值newCapacity，生成一个新的随机hash种子，然后执行`transfer`方法
    transfer(newTable, initHashSeedAsNeeded(newCapacity));
    // 把table变量指向新申请的table地址
    table = newTable;
    // 重新计算下threshold
    threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
}

上面的方法中，比较重要的是transfer方法。

transfer方法

// 核心: 拷贝table中的entry到新的table中
// 整个方法分两层循环
// 第一层for循环主要用来遍历数组；第二层while是用来遍历Entry组成的单向链表
void transfer(Entry[] newTable, boolean rehash) {
    int newCapacity = newTable.length;
    // 遍历数组
    for (Entry<K,V> e : table) {
        // 遍历链表
        while(null != e) {
            // 保留当前e的next entry，可能为null，此时没有形成链表，也就是没有hash冲突
            Entry<K,V> next = e.next;
            // 是否需要rehash，也就是initHashSeedAsNeeded方法的返回值
            // 这里是false
            if (rehash) {
                // 如果key为null，则hash code =0；否则使用hash方法计算hash code
                e.hash = null == e.key ? 0 : hash(e.key);
            }
            // 根据新table的容量和当前entry的hash值，计算entry应该插入新table的位置
            int i = indexFor(e.hash, newCapacity);
            // step1: e的next引用指向newTable的索引为i的位置，此时newTable[i]为null
            e.next = newTable[i];
            // step2: 给newTable索引为i处赋值；也就是e.next=e
            newTable[i] = e;
            // step3:  然后把next元素赋值给当前e
            e = next;
            
            // 经过上面的step1,step2, 链表发生了反向逆转,原来的A->B 变为B->A
            // step3的作用是把当前e设置为B,进行下一次的循环。
            // 最终的结果是：原本的链表A->B->C->D变成了D->C->B->A
        }
    }
}

transfer方法中比较中要的就是：hash()方法和indexFor方法。hash()方法这里暂时没有用到，我们先跳过。先看indexFor方法。

indexFor方法

// h： key的hash code; length：Entry[]数组的大小
static int indexFor(int h, int length) {
    // 通过bitCount可以计算出，i的二进制表示中有多个1。
    // 如果二进制位中只有一个1，说明该int值是2的次幂。
    // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2";
    
    // length-1表示索引下标的最大值
    // length-1换成二进制永远是全部为1，比如容量为16，则length-1为1111
    // h & (length-1): 保证了元素在数组table中的位置取决于hash code的值(二进制都为1，结果才为1, length-1的所有二进制位都是1)。
    // 同时也保证了得到的数组table的索引不会越界(length-1也是int，除了低位全部为1,高位全部为0. )。
    return h & (length-1);
}

到此，顺着resize方法的线路分析就结束了。我们回到void addEntry(int hash, K key, V value, int bucketIndex)方法。

addEntry方法的后半部分

void addEntry(int hash, K key, V value, int bucketIndex) {
    if ((size >= threshold) && (null != table[bucketIndex])) {
        // 上面分析过了
        resize(2 * table.length);
        
        // 当key为null时，hasd code=0； 否则使用hash函数计算出hash code
        hash = (null != key) ? hash(key) : 0;
        // 计算插入位置
        bucketIndex = indexFor(hash, table.length);
    }
        
    createEntry(hash, key, value, bucketIndex);
}

hash()方法

final int hash(Object k) {
    // hash种子
    int h = hashSeed;
    // 忽略这里的，只有k为string才会生效
    if (0 != h && k instanceof String) {
        return sun.misc.Hashing.stringHash32((String) k);
    }

    // h为0, 和k的hashcode进行异或。异或的结果就是h的值等于k的hashcode
    h ^= k.hashCode();

    // This function ensures that hashCodes that differ only by
    // constant multiples at each bit position have a bounded
    // number of collisions (approximately 8 at default load factor).
    
    // 分析过程如下：假设h=1001 0001 1101 0000 0011 0001 1111 1001
    /**
     * 1. h >>> 20： 无符号右移20位，空高20位为0. 
     * 如下：
     * 1001 0001 1101 0000 0011 0001 1111 1001
     * 0000 0000 0000 0000 0000 1001 0001 1101
     /
     
     /**
     * 2. h >>> 12： 无符号右移12位，空高12位为0. 
     * 如下：
     * 1001 0001 1101 0000 0011 0001 1111 1001
     * 0000 0000 0000 1001 0001 1101 0000 0011
     /
     
     /** 3:(h >>> 20) ^ (h >>> 12)
     *  在1和2的基础上，1和2的再次异或的结果为：
     *  0000 0000 0000 0000 0000 1001 0001 1101
     *  0000 0000 0000 1001 0001 1101 0000 0011
     *  0000 0000 0000 1001 0001 0100 0001 1110 (结果)
     *  结果: 中间的8位保留了，低12位进行了异或运算
     */
     
     /** 4. 3的结果再次和h进行异或
     * 1001 0001 1101 0000 0011 0001 1111 1001 (h)
     * 0000 0000 0000 1001 0001 0100 0001 1110 (3步骤的结果)
     * 1001 0001 1101 1001 0010 0101 1110 0111 (结果)
     /
     
     /** 5. h >>> 7: h无符号右移7位, 空出高7位为0
     * 1001 0001 1101 1001 0010 0101 1110 0111
     * 0000 0001 0010 0011 1011 0010 0100 1011 (无符号右移7位的结果)
     /
     
     /** 6. h >>> 4: h无符号右移4位, 空出高4位为0
     * 1001 0001 1101 1001 0010 0101 1110 0111
     * 0000 1001 0001 1101 1001 0010 0101 1110 (无符号右移4位的结果)
     /
     
     /** 7: h ^ (h >>> 7) : 也就是第5步的结果和h进行异或
     * 1001 0001 1101 1001 0010 0101 1110 0111 (h, 第4步)
     * 0000 0001 0010 0011 1011 0010 0100 1011 (第5步结果)
     * 1001 0001 1111 1010 1001 0111 1010 1100 (结果)
     */
     
     /** 8: h ^ (h >>> 7) ^ (h >>> 4) : 也就是第7步的结果和第6位进行异或
     * 1001 0001 1111 1010 1001 0111 1010 1100 (第7步结果)
     * 0000 1001 0001 1101 1001 0010 0101 1110 (第6步结果)
     * 1001 1000 1110 0111 0000 0101 1111 0010 (结果)
     */
     
     /**
     *1001 0001 1101 0000 0011 0001 1111 1001 (h原值)
     *1001 1000 1110 0111 0000 0101 1111 0010 (h异或后)
     */
     
     
    // hash ()可以称为干扰函数，高位参与低位运算。使低位和整个hash值更加随机，减少hash碰撞。
    h ^= (h >>> 20) ^ (h >>> 12);
    return h ^ (h >>> 7) ^ (h >>> 4);
}

从indexFor的分析中，我们知道，通过h & (length-1)获取在entry[]数组中的索引。length总是2的n次方，length-1的结果总是低位全部为1。也就是说，Entry在数组中的位置取决于hash的低位。hashmap的默认大小为16。length-1=15 (0000 0000 0000 0000 0000 0000 0000 1111)；h & (length-1)也就是说只有h的低4位参与了运算。如果我们能使h的高位也参与运算，能更加使低位随机，也就减少了hash的碰撞。那么，怎么使h的高位也参与低位的运算呢。在jdk7中的hash()函数，通过位移+异或的方式，如上面的代码。

addEntry方法中的hash方法也分析完了，我们接着分析createEntry(hash, key, value, bucketIndex)方法。

createEntry(hash, key, value, bucketIndex)

void createEntry(int hash, K key, V value, int bucketIndex) {
    // 取出bucketIndex出的Entry. e可能为null，如果bucketIndex位置之前没有Entry
    Entry<K,V> e = table[bucketIndex];
    // 重新设置bucketIndex处的Entry，并把这个Entry的next指向e。
    // （会形成链表，新插入的Entry总是在bucketIndex处，也就是链表的头）
    table[bucketIndex] = new Entry<>(hash, key, value, e);
    // 记录元素个数
    size++;
}

至此，我们就分析完了put方法中的putForNullKey()过程。

2.5 Key不为null时

put()方法

public V put(K key, V value) {
    if (table == EMPTY_TABLE) {
        inflateTable(threshold);
    }
    if (key == null)
        // 上面分析过了
        return putForNullKey(value);
    // 获取key的hash code
    int hash = hash(key);
    // 计算索引
    int i = indexFor(hash, table.length);
    // 遍历链表，判断Key是否已经存在，如果存在替换Value
    for (Entry<K,V> e = table[i]; e != null; e = e.next) {
        Object k;
        if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {
            V oldValue = e.value;
            e.value = value;
            e.recordAccess(this);
            // 返回旧值
            return oldValue;
        }
    }

    // 记录修改次数
    modCount++;
    // 如果key不存在，添加Key-Value
    addEntry(hash, key, value, i);
    return null;
}

2.6 get方法

get(key)

public V get(Object key) {
    if (key == null)
        // key = Null
        return getForNullKey();
        
    // key != null时
    Entry<K,V> entry = getEntry(key);
    return null == entry ? null : entry.getValue();
}

getForNullKey()

private V getForNullKey() {
    if (size == 0) {
        return null;
    }
    // 只需要遍历tablep[0]，因为key=null时，计算出的index一定为0
    for (Entry<K,V> e = table[0]; e != null; e = e.next) {
        if (e.key == null)
            return e.value;
    }
    return null;
}

getEntry(key)

final Entry<K,V> getEntry(Object key) {
    if (size == 0) {
        return null;
    }
    
    // 计算hash
    int hash = (key == null) ? 0 : hash(key);
    // 先通过indexFor计算出数组下标，然后遍历链表，比较key的hash值和key == /equals
    for (Entry<K,V> e = table[indexFor(hash, table.length)];
         e != null;
         e = e.next) {
        Object k;
        if (e.hash == hash &&
            ((k = e.key) == key || (key != null && key.equals(k))))
            return e;
    }
    return null;
}

3. 面试常见问题总结

HashMap的put和get的过程?
为什么hashmap的大小一定要保持2的n次方？
hashmap是否线程安全？怎么保证线程安全？有没有更好的方案？
hashmap在多线程环境可能会死循环，导致占用cpu 100%，为什么会出现这样的情况？出现在哪一步？
怎么尽可能减少hash冲突？有什么方案可以优化不？
jdk1.7和1.8 hashmap的实现有什么变化？怎么变化的？
jdk1.8 hashmap如何利用红黑树实现链表过长时，提高查询，删除，插入效率的？
延伸到ConcurrentHashMap，不拉不拉继续问一大堆...
自己实现个hashmap?
...

HashMap源码详细分析