本文相关源码:
java.lang.Class
java.lang.Thread
art/runtime/native/java_lang_Class.cc
art/runtime/native/java_lang_Object.cc
art/runtime/native/java_lang_Thread.cc
art/runtime/class_linker.cc
art/runtime/lock_word.h
art/runtime/monitor.cc
art/runtime/mirror/object.cc
其中 art/runtime/native/ 目录下是 jni 相关代码,art/runtime/mirror 下是 class、object 等在 native 层的实现。
一、本文目的
本文将分析 Art 虚拟机层面 synchronized 的实现机制以及 Demo 验证当死锁发生时找出等待锁的相关线程。
二、Art synchronized 实现
2.1 Object、LockWord、Monitor、Thread 之间的关系
对象 hashcode 和锁的信息存储于对象头中,Art 中其实现为 LockWord,art/runtime/lock_word.h 文件中有对其数据的详细说明:
/* The lock value itself as stored in mirror::Object::monitor_. The two most significant bits
* encode the state. The four possible states are fat locked, thin/unlocked, hash code, and
* forwarding address.
*
* When the lock word is in the "thin" state and its bits are formatted as follows:
*
* |33|2|2|222222221111|1111110000000000|
* |10|9|8|765432109876|5432109876543210|
* |00|m|r| lock count |thread id owner |
*
* The lock count is zero, but the owner is nonzero for a simply held lock.
* When the lock word is in the "fat" state and its bits are formatted as follows:
*
* |33|2|2|2222222211111111110000000000|
* |10|9|8|7654321098765432109876543210|
* |01|m|r| MonitorId |
*
* When the lock word is in hash state and its bits are formatted as follows:
*
* |33|2|2|2222222211111111110000000000|
* |10|9|8|7654321098765432109876543210|
* |10|m|r| HashCode |
*
* When the lock word is in forwarding address state and its bits are formatted as follows:
*
* |33|2|22222222211111111110000000000|
* |10|9|87654321098765432109876543210|
* |11|0| ForwardingAddress |
*
* The `r` bit stores the read barrier state.
* The `m` bit stores the mark bit state.
*/
这段说明里表明了 mirror::Object,LockWord 和 Monitor 之间的关系。 “The lock value itself as stored in mirror::Object::monitor_” 这里说明了 LockWord 和 mirror::Object 之间的关系。Object 中的 monitor_ 其实是一个 LockWord。 当 LockWord 中锁标志位为 01 时会创建一个 Monitor ,LockWord 中 MonitorId 指向 Monitor。
Monitor 中 owner_ 成员中记录着当前获得锁的 thread tid。
// Which thread currently owns the lock? monitor_lock_ only keeps the tid.
// Only set while holding monitor_lock_. Non-locking readers only use it to
// compare to self or for debugging.
std::atomic<Thread*> owner_;
2.2 锁膨胀过程
锁膨胀的整体过程见上图。
InflateThinLocked 即为锁膨胀逻辑,它的触发条件有两个:
- lock count 次数超过 kThinLockMaxCount
- LockWord 中当前偏向线程 ID 不是当前线程,竞争锁
Inflate 的时候创建 Monitor,Install 时创建一个关联当前 Monitor 的新 LockWord 并尝试通过 cas 替换 Object 的 monitor_。
三、死锁监控
3.1 判断死锁逻辑
3.1.1 判断死锁逻辑
在写具体的之前,先理清楚,如果发生了死锁,怎么找出相关线程以及判断是否死锁?
- 找出处于 Blocked 状态线程
- 找出 Blocked 线程所等待的锁
- 找出锁的持有线程
- 判断是否死锁
3.1.2 相关 Api
上面列的步骤里,除了第四步,前三步都有相关 Api。根据前三步的数据,第四步可以自己计算。
找出处于 Blocked 状态线程:
val threads = getAllThread()
threads.forEach {
if (it?.isAlive == true && it.state == Thread.State.BLOCKED) {
}
}
找出线程所竞争的锁:
ObjPtr<mirror::Object> Monitor::GetContendedMonitor(Thread* thread) {
// This is used to implement JDWP's ThreadReference.CurrentContendedMonitor, and has a bizarre
// definition of contended that includes a monitor a thread is trying to enter...
ObjPtr<mirror::Object> result = thread->GetMonitorEnterObject();
if (result == nullptr) {
// ...but also a monitor that the thread is waiting on.
MutexLock mu(Thread::Current(), *thread->GetWaitMutex());
Monitor* monitor = thread->GetWaitMonitor();
if (monitor != nullptr) {
result = monitor->GetObject();
}
}
return result;
}
找出持有当前锁的线程:
uint32_t Monitor::GetOwnerThreadId() {
// Make sure owner is not deallocated during access.
MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
Thread* owner = GetOwner();
if (owner != nullptr) {
return owner->GetThreadId();
} else {
return ThreadList::kInvalidThreadId;
}
}
3.2 一些问题
Api 有了,但把这些 Api 组合到一起工程化并输出正确结果还有一些问题。
- 获取所有线程在 java 层,Monitor 相关方法在 native,java Thread 与 native Thread 怎么关联起来
- 怎么去调用 native 层相关方法
3.2.1 java Thread 与 native Thread
java Thread 有个成员变量 nativePeer,native 层与其对应的是 Thread 对象指针。 在 Thread::CreateNativeThread 时会创建相关 Thread 对象指针并将其赋值给 java 层。
private volatile long nativePeer;
void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
CHECK(java_peer != nullptr);
...
Thread* child_thread = new Thread(is_daemon);
// Use global JNI ref to hold peer live while child thread starts.
child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer);
stack_size = FixStackSize(stack_size);
// Thread.start is synchronized, so we know that nativePeer is 0, and know that we're not racing
// to assign it.
SetNativePeer(env, java_peer, child_thread);
...
}
3.2.2 相关方法符号与方法调用
方法符号
相关方法的符号可以通过 objdump 在 libart.so 中找到,这里直接贴出来。
GetContendedMonitor 符号:
_ZN3art7Monitor19GetContendedMonitorEPNS_6ThreadE
GetLockOwnerThreadId 符号:
//SDK version < 29
"_ZN3art7Monitor20GetLockOwnerThreadIdEPNS_6mirror6ObjectE";
//SDK version >= 29
"_ZN3art7Monitor20GetLockOwnerThreadIdENS_6ObjPtrINS_6mirror6ObjectEEE";
方法调用
可以通过 dlsym 调用相关方法:
void* dlsym(void* __handle, const char* __symbol);
Android 7.0 之后 Google 限制 App 直接调用 dlopen()、dlsym() 等方法,不过可以绕过。本文相关代码使用的是 ndk_dlopen,具体原理可以看其介绍。
3.3 源码
java 层:
private fun deadLockMonitor() {
val threadGroup = Thread.currentThread().threadGroup;
val total = Thread.activeCount()
val array = arrayOfNulls<Thread>(total)
threadGroup?.enumerate(array)
array.forEach {
if (it?.isAlive == true && it.state == Thread.State.BLOCKED) {
val field = Thread::class.java.getDeclaredField("nativePeer")
field.isAccessible = true
val currentNativePeer = field[it]
val currentThreadId = nativePeer2ThreadId(currentNativePeer as Long)
val monitorThreadID = waitingForThreadId(currentNativePeer)
println("$TAG current thread $currentThreadId is waiting for thread $monitorThreadID")
}
}
}
private external fun nativePeer2ThreadId(nativePeer: Long): Int
private external fun waitingForThreadId(nativeThread: Long): Int
native 层:
static volatile int SDK_INT = 0;
const char *get_lock_owner_symbol_name() {
if (SDK_INT < 29) {
// android 9.0 之前
return "_ZN3art7Monitor20GetLockOwnerThreadIdEPNS_6mirror6ObjectE";
} else {
return "_ZN3art7Monitor20GetLockOwnerThreadIdENS_6ObjPtrINS_6mirror6ObjectEEE";
}
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_example_deadblocktest_MainActivity_nativePeer2ThreadId(JNIEnv *env, jobject thiz,
jlong native_peer) {
if (native_peer != 0) {
if (SDK_INT > 20) {
//long 强转 int
int *pInt = reinterpret_cast<int *>(native_peer);
//地址 +3,得到 native id
pInt = pInt + 3;
return *pInt;
}
}
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_example_deadblocktest_MainActivity_waitingForThreadId(JNIEnv *env, jobject thiz,
jlong native_thread) {
//1、初始化
ndk_init(env);
void *so_addr = ndk_dlopen("libart.so", RTLD_NOLOAD);
if (so_addr == nullptr) {
return 0;
}
// Monitor::GetContendedMonitor
void *get_contended_monitor = ndk_dlsym(so_addr, "_ZN3art7Monitor19GetContendedMonitorEPNS_6ThreadE");
if (get_contended_monitor == nullptr) {
return 0;
}
// Monitor::GetLockOwnerThreadId
void *get_lock_owner_thread_id = ndk_dlsym(so_addr, get_lock_owner_symbol_name());
if (get_lock_owner_thread_id == nullptr) {
return 0;
}
int monitor_thread_id = 0;
if (get_contended_monitor != nullptr && get_lock_owner_thread_id != nullptr) {
// 获取监视器
int monitorObj = ((int (*)(long)) get_contended_monitor)(native_thread);
if (monitorObj != 0) {
// 通过监视器获取具有执行权的线程id
monitor_thread_id = ((int (*)(int)) get_lock_owner_thread_id)(monitorObj);
} else {
monitor_thread_id = 0;
}
}
return monitor_thread_id;
}
extern "C" jint JNI_OnLoad(JavaVM *vm, void *reserved) {
char sdk[PROP_VALUE_MAX];
__system_property_get("ro.build.version.sdk", sdk);
SDK_INT = atoi(sdk);
return JNI_VERSION_1_4;
}
结果如下: