感言
看了一些文章,觉得大佬还得是大佬。如写这篇文章之前原来是没有打算写这篇文章的,在前一篇文章查 baidu 时看到了几遍文章是分析死锁的,看了文章的质量,感叹不已,虽然对 Android 系统有一点点了解,但去解决问题时还是习惯先搜索一波,但是大佬已经在源码的世界里面遨游。学的越多才知道自己有多菜。
这篇文章的思路内容大致都是 copy 的,只是将其写出来而已。所以先给出网上的资料
Thread monitoring: deadlock, lifetime and CPU utilization
原理
主要是二个函数
http://androidxref.com/9.0.0_r3/xref/art/runtime/monitor.cc
// 获取当前线程等待的Monitor
mirror::Object* Monitor::GetContendedMonitor(Thread* thread) {
// This is used to implement JDWP's ThreadReference.CurrentContendedMonitor, and has a bizarre
// definition of contended that includes a monitor a thread is trying to enter...
mirror::Object* result = thread->GetMonitorEnterObject();
if (result == nullptr) {
// ...but also a monitor that the thread is waiting on.
MutexLock mu(Thread::Current(), *thread->GetWaitMutex());
Monitor* monitor = thread->GetWaitMonitor();
if (monitor != nullptr) {
result = monitor->GetObject();
}
}
return result;
}
// 当前锁被哪个线程持有
uint32_t Monitor::GetLockOwnerThreadId(mirror::Object* obj) {
DCHECK(obj != nullptr);
LockWord lock_word = obj->GetLockWord(true);
switch (lock_word.GetState()) {
case LockWord::kHashCode:
// Fall-through.
case LockWord::kUnlocked:
return ThreadList::kInvalidThreadId;
case LockWord::kThinLocked:
return lock_word.ThinLockOwner();
case LockWord::kFatLocked: {
Monitor* mon = lock_word.FatLockMonitor();
return mon->GetOwnerThreadId();
}
default: {
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
}
}
Monitor
Monitor是一种并发控制机制,提供多线程环境下的互斥和同步,以支持安全的并发访问。
Monitor由以下3个元素组成:
- 临界区:例如synchronize修饰的代码块
- 条件变量:用来维护因不满足条件而阻塞的线程队列
- Monitor对象,维护Monitor的入口、临界区互斥量(即锁)、临界区和条件变量,以及条件变量上的阻塞和唤醒
GetContendedMonitor()
函数来获取 thread 持有的锁,而GetLockOwnerThreadId()
函数来获取持有锁的线程id
步骤:
- 获取所有的线程,判断其状态
- 遍历状态为 BLOCK 的线程,获取其 native 的线程指针,使用上面的二个函数找到持有锁的线程
- 通过关系链,判断死锁的线程,输出堆栈信息
代码实现
void *getLockOwnerThreadId;
void *getContendedMonitor;
/**
* ndk_init()、ndk_dlopen()、ndk_dlsym():Android 7.0开始,系统限制App中调用dlopen,dlsym等函数打开系统动态库,所以自己实现一份(系统copy)
* 获取 GetContendedMonitor() GetLockOwnerThreadId() 的函数地址
*/
extern "C"
JNIEXPORT void JNICALL
Java_com_dabaicai_miniter_1hook_NativeBridge_deadLockInit(JNIEnv *env, jclass clazz, jint sdkVersion) {
ndk_init(env);
void *so_addr = ndk_dlopen("libart.so", RTLD_LAZY);
if (so_addr == NULL) {
__android_log_print(ANDROID_LOG_ERROR, "native-hook", "dlopen libart.so fail");
return;
}
// GetContendedMonitor 获取当前线程在竞争的锁
getContendedMonitor = ndk_dlsym(so_addr, "_ZN3art7Monitor19GetContendedMonitorEPNS_6ThreadE");
if (getContendedMonitor == NULL) {
__android_log_print(ANDROID_LOG_ERROR, "native-hook", "ndk_dlsym GetContendedMonitor fail");
return;
}
// GetLockOwnerThreadId 当前锁被哪个线程 id 持有了,只有这个方法可以间接的做到
char *symbol;
if (sdkVersion < 29) {
symbol = "_ZN3art7Monitor20GetLockOwnerThreadIdEPNS_6mirror6ObjectE";
} else {
symbol = "_ZN3art7Monitor20GetLockOwnerThreadIdENS_6ObjPtrINS_6mirror6ObjectEEE";
}
getLockOwnerThreadId = ndk_dlsym(so_addr, symbol);
if (getLockOwnerThreadId == NULL) {
__android_log_print(ANDROID_LOG_ERROR, "native-hook", "ndk_dlsym getLockOwnerThreadId fail");
return;
}
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_dabaicai_miniter_1hook_NativeBridge_getBolckThreadId(JNIEnv *env, jclass clazz, jlong current_thread_ptr) {
if (current_thread_ptr == 0 || getLockOwnerThreadId == NULL || getContendedMonitor == NULL) {
return 0;
}
int monitorObj = ((int (*)(long)) getContendedMonitor)(current_thread_ptr);
if (monitorObj == 0) {
return 0;
}
int moniterThreadId = ((int (*)(int)) getLockOwnerThreadId)(monitorObj);
return moniterThreadId;
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_dabaicai_miniter_1hook_NativeBridge_getCurrentThreadId(JNIEnv *env, jclass clazz, jlong current_thread_ptr, jint sdkVersion) {
if (current_thread_ptr != 0) {
if (sdkVersion > 20) { // 大于 5.0
int *pInt = reinterpret_cast<int *>(current_thread_ptr);
pInt = pInt + 3;
return *pInt;
}
}
return 0;
}
// 1.初始化 native 代码
NativeBridge.deadLockInit(Build.VERSION.SDK_INT);
// 2.获取全部线程后判断状态是否为 block
// 3.搜集线程状态为 block 的线程,并获取对应的互斥量的线程
public void moniterDeadLock() {
Map<Thread, StackTraceElement[]> allStackTraces = Thread.getAllStackTraces();
deadLockThreadMap = new HashMap<>();
for (Thread thread : allStackTraces.keySet()) {
if (thread.getState() == BLOCKED) {
long ptr = getThreadnativePeer(thread);
if (ptr == 0) {
continue;
}
int blockThread = NativeBridge.getBolckThreadId(ptr);
int curThread = NativeBridge.getCurrentThreadId(ptr, Build.VERSION.SDK_INT);
deadLockThreadMap.put(curThread, new DeadLockThread(thread, curThread, blockThread));
}
}
analysisDeadLock();
}
// 4.分析死锁
private void analysisDeadLock() {
Set<Integer> threadSet = new HashSet<>();
List<Map<Integer, Thread>> deadLockLists = new ArrayList<>();
for (Integer threadId : deadLockThreadMap.keySet()) {
if (threadSet.contains(threadId)) {
continue;
}
Map<Integer, Thread> deadLockPair = collectDeadLockGroup(threadId, new HashMap<Integer, Thread>());
threadSet.addAll(deadLockPair.keySet());
deadLockLists.add(deadLockPair);
}
for (Map<Integer, Thread> pair : deadLockLists) {
for (Integer blockThreadId : pair.keySet()) {
DeadLockThread deadLockThread = deadLockThreadMap.get(blockThreadId);
if (deadLockThread == null) {
continue;
}
Thread curThread = pair.get(deadLockThread.curThread);
Thread blockThread = pair.get(deadLockThread.blockThread);
logImpl.Loge(TAG, "find dead lock,current thread name is " + curThread.getName());
StackTraceElement[] stackTraceElements = curThread.getStackTrace();
StringBuilder stringBuilder = new StringBuilder();
for (StackTraceElement stackTraceElement : stackTraceElements) {
stringBuilder.append(stackTraceElement);
}
logImpl.Loge(TAG, stringBuilder.toString());
stringBuilder = new StringBuilder();
logImpl.Loge(TAG, "find dead lock,wait thread name is " + blockThread.getName());
stackTraceElements = blockThread.getStackTrace();
for (StackTraceElement stackTraceElement : stackTraceElements) {
stringBuilder.append(stackTraceElement);
}
logImpl.Loge(TAG, stringBuilder.toString());
}
}
}
private Map<Integer, Thread> collectDeadLockGroup(Integer threadId, Map<Integer, Thread> deadLockMap) {
if (deadLockMap.keySet().contains(threadId)) {
return deadLockMap;
}
deadLockMap.put(threadId, deadLockThreadMap.get(threadId).thread);
return collectDeadLockGroup(deadLockThreadMap.get(threadId).blockThread, deadLockMap);
}
private long getThreadnativePeer(Thread thread) {
long nativePeer = 0;
try {
Field nativePeerField = Thread.class.getDeclaredField("nativePeer");
nativePeerField.setAccessible(true);
nativePeer = (long) nativePeerField.get(thread);
} catch (NoSuchFieldException | IllegalAccessException e) {
e.printStackTrace();
}
return nativePeer;
}
class DeadLockThread {
Thread thread;
int curThread;
int blockThread;
public DeadLockThread(Thread thread, int curThread, int blockThread) {
this.thread = thread;
this.curThread = curThread;
this.blockThread = blockThread;
}
}
//dlopen.h
/*
*
* @author : rrrfff@foxmail.com
* https://github.com/rrrfff/ndk_dlopen
*
*/
#pragma once
#include <jni.h>
#include <dlfcn.h>
#ifdef __cplusplus
extern "C" {
#endif
void ndk_init(JNIEnv *env);
void *ndk_dlopen(const char *filename, int flag);
int ndk_dlclose(void *handle);
const char *ndk_dlerror(void);
void *ndk_dlsym(void *handle, const char *symbol);
int ndk_dladdr(const void *ddr, Dl_info *info);
#ifdef __cplusplus
}
#endif
//dlopen.c
/*
*
* @author : rrrfff@foxmail.com
* https://github.com/rrrfff/ndk_dlopen
*
*/
#include "dlopen.h"
#include <stdlib.h>
#include <limits.h>
#include <sys/mman.h>
#include <sys/system_properties.h>
#include <android/log.h>
#include <string.h>
#include <unistd.h>
#define LOG_TAG "ndk_dlopen"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
static volatile int SDK_INT = 0;
static void *quick_on_stack_back;
static union {
void *generic_stub;
void *(*quick_on_stack_replace)(const void *param1, const void *param2,
const void *fake_trampoline, const void *called);
} STUBS;
void JNIEXPORT
ndk_init(JNIEnv
*env) {
if (SDK_INT <= 0) {
char sdk[PROP_VALUE_MAX];
__system_property_get("ro.build.version.sdk", sdk);
SDK_INT = atoi(sdk);
LOGI("SDK_INT = %d", SDK_INT);
if (SDK_INT >= 24) {
static __attribute__((__aligned__(PAGE_SIZE))) uint8_t __insns[PAGE_SIZE];
STUBS.
generic_stub = __insns;
mprotect(__insns,
sizeof(__insns), PROT_READ | PROT_WRITE | PROT_EXEC);
// we are currently hijacking "FatalError" as a fake system-call trampoline
uintptr_t pv = (uintptr_t) (*env)->FatalError;
uintptr_t pu = (pv | (PAGE_SIZE - 1)) + 1u;
uintptr_t pd = (pv & ~(PAGE_SIZE - 1));
mprotect((void *) pd, pv + 8u >= pu ? PAGE_SIZE * 2u : PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
quick_on_stack_back = (void *) pv;
#if defined(__i386__)
/*
DEFINE_FUNCTION art_quick_on_stack_replace
movl 12(REG_VAR(esp)), REG_VAR(eax)
movl (REG_VAR(esp)), REG_VAR(edx)
movl REG_VAR(eax), (REG_VAR(esp))
movl REG_VAR(edx), 12(REG_VAR(esp))
pushl 16(REG_VAR(esp))
ret
END_FUNCTION art_quick_on_stack_replace
*/
memcpy(__insns, "\x8B\x44\x24\x0C\x8B\x14\x24\x89\x04\x24\x89\x54\x24\x0C\xFF\x74\x24\x10\xC3", 19);
/*
DEFINE_FUNCTION art_quick_on_stack_back
push 8(REG_VAR(esp))
ret
END_FUNCTION art_quick_on_stack_back
*/
memcpy(quick_on_stack_back, "\xC3\xFF\x74\x24\x08\xC3", 6);
quick_on_stack_back = (void *) (pv + 1); // inserts `ret` at first
#elif defined(__x86_64__)
// rdi, rsi, rdx, rcx, r8, r9
/*
0x0000000000000000: 52 push rdx
0x0000000000000001: 52 push rdx
0x0000000000000002: FF E1 jmp rcx
*/
memcpy(__insns, "\x52\x52\xFF\xE1", 4);
/*
0x0000000000000000: 5A pop rdx
0x0000000000000000: C3 ret
*/
memcpy(quick_on_stack_back, "\x5A\xC3", 2);
#elif defined(__aarch64__)
// x0~x7
/*
0x0000000000000000: FD 7B BF A9 stp x29, x30, [sp, #-0x10]!
0x0000000000000004: FD 03 00 91 mov x29, sp
0x0000000000000008: FE 03 02 AA mov x30, x2
0x000000000000000C: 60 00 1F D6 br x3
*/
memcpy(__insns, "\xFD\x7B\xBF\xA9\xFD\x03\x00\x91\xFE\x03\x02\xAA\x60\x00\x1F\xD6", 16);
/*
0x0000000000000000: FD 7B C1 A8 ldp x29, x30, [sp], #0x10
0x0000000000000004: C0 03 5F D6 ret
*/
memcpy(quick_on_stack_back, "\xFD\x7B\xC1\xA8\xC0\x03\x5F\xD6", 8);
#elif defined(__arm__)
// r0~r3
/*
0x0000000000000000: 08 E0 2D E5 str lr, [sp, #-8]!
0x0000000000000004: 02 E0 A0 E1 mov lr, r2
0x0000000000000008: 13 FF 2F E1 bx r3
*/
memcpy(__insns, "\x08\xE0\x2D\xE5\x02\xE0\xA0\xE1\x13\xFF\x2F\xE1", 12);
if ((pv & 1u) != 0u) { // Thumb
/*
0x0000000000000000: 0C BC pop {r2, r3}
0x0000000000000002: 10 47 bx r2
*/
memcpy((void *)(pv - 1), "\x0C\xBC\x10\x47", 4);
} else {
/*
0x0000000000000000: 0C 00 BD E8 pop {r2, r3}
0x0000000000000004: 12 FF 2F E1 bx r2
*/
memcpy(quick_on_stack_back, "\x0C\x00\xBD\xE8\x12\xFF\x2F\xE1", 8);
} //if
#else
# error "not supported"
#endif
LOGI("init done! quick_on_stack_replace = %p, quick_on_stack_back = %p",
STUBS.generic_stub, quick_on_stack_back);
} //if
} //if
}
void *JNIEXPORT
ndk_dlopen(const char *filename, int flag) {
if (SDK_INT >= 24) {
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__arm__)
return STUBS.quick_on_stack_replace(filename, (void *) flag,
quick_on_stack_back, dlopen);
#else
# error "not supported"
#endif
} //if
return dlopen(filename, flag);
}
int JNIEXPORT
ndk_dlclose(void *handle) {
if (SDK_INT >= 24) {
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__arm__)
return (int) STUBS.quick_on_stack_replace(handle, NULL,
quick_on_stack_back, dlclose);
#else
# error "not supported"
#endif
} //if
return dlclose(handle);
}
const char *JNIEXPORT
ndk_dlerror(void) {
if (SDK_INT >= 24) {
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__arm__)
return STUBS.quick_on_stack_replace(NULL, NULL,
quick_on_stack_back, dlerror);
#else
# error "not supported"
#endif
} //if
return dlerror();
}
void *JNIEXPORT
ndk_dlsym(void *handle, const char *symbol) {
if (SDK_INT >= 24) {
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__arm__)
return STUBS.quick_on_stack_replace(handle, symbol,
quick_on_stack_back, dlsym);
#else
# error "not supported"
#endif
} //if
return dlsym(handle, symbol);
}
int JNIEXPORT
ndk_dladdr(const void *ddr, Dl_info *info) {
if (SDK_INT >= 24) {
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__arm__)
return (int) STUBS.quick_on_stack_replace(ddr, info,
quick_on_stack_back, dladdr);
#else
# error "not supported"
#endif
} //if
return dladdr(ddr, info);
}
测试代码
private void deadLockTest() {
Thread thread1 = new Thread("threa1") {
@Override
public void run() {
super.run();
Log.e(TAG, "threa1 start");
synchronized (lock1) {
sleep_(2);
synchronized (lock2) {
sleep_(2);
synchronized (lock3) {
Log.e(TAG, "threa1");
}
}
}
}
};
Thread thread2 = new Thread("threa2") {
@Override
public void run() {
super.run();
Log.e(TAG, "threa2 start");
synchronized (lock2) {
sleep_(2);
synchronized (lock3) {
sleep_(2);
synchronized (lock1) {
Log.e(TAG, "threa2");
}
}
}
}
};
Thread thread3 = new Thread("threa3") {
@Override
public void run() {
super.run();
Log.e(TAG, "threa2 start");
synchronized (lock3) {
sleep_(2);
synchronized (lock1) {
sleep_(2);
synchronized (lock2) {
Log.e(TAG, "threa3");
}
}
}
}
};
thread1.start();
thread2.start();
thread3.start();
try {
Thread.sleep(100);
Log.e(TAG, "thread1 "+thread1.getState());
Log.e(TAG, "thread2 "+thread2.getState());
Log.e(TAG, "thread3 "+thread3.getState());
} catch (InterruptedException e) {
e.printStackTrace();
}
getThreadHookMoniter().moniterDeadLock();
}
结果
WeChat592313554d8b518274f7c7cd7e3a8dcb.png