触发ANR条件
- Service Timeout:前台服务20s内未完成执行,后台服务超过200s
- BroadcastQueue Timeout:前台广播在10s内未完成,后台广播超过60s未完成
- ContentProvider Timeout:内容提供者,在publish过超时10s
- InputDispatching Timeout: 输入事件分发超时5s,包括按键和触摸事件
对应最后执行ANR记录的方法是:
- ActiveServices.serviceTimeout()
- BroadcastQueue.AppNotResponding.run().appNotResponding()
- AMS.appNotRespondingViaProvider()
- AMS.inputDispatchingTimedOut()
理解为什么会出发ANR,必须明白四大组件的启动流程。这个我们以后分析。
分析ANR触发之后记录问题
可以将anr目录下的文件放到电脑上进行查看
adb pull data/anr .
属性系统可以通过
adb shell getprop dalvik.vm.stack-trace-file
这种方式查找对应的属性值
当触发ANR之后会调用AppErrors.appNotResponding()方法
final void appNotResponding(ProcessRecord app, ActivityRecord activity,
ActivityRecord parent, boolean aboveSystem, final String annotation) {
ArrayList<Integer> firstPids = new ArrayList<Integer>(5);
SparseArray<Boolean> lastPids = new SparseArray<Boolean>(20);
...
//记录ANR时间
long anrTime = SystemClock.uptimeMillis();
//更新CPU状态
if (ActivityManagerService.MONITOR_CPU_USAGE) {
mService.updateCpuStatsNow();
}
//特定场景下忽略ANR
synchronized (mService) {
if (mService.mShuttingDown) {
Slog.i(TAG, "During shutdown skipping ANR: " + app + " " + annotation);
return;
} else if (app.notResponding) {
Slog.i(TAG, "Skipping duplicate ANR: " + app + " " + annotation);
return;
} else if (app.crashing) {
Slog.i(TAG, "Crashing app skipping ANR: " + app + " " + annotation);
return;
}
}
//为了防止多次对相同app的anr执行重复代码,在此处标注记录,属于上面的特定情况种的一种
app.notResponding = true;
//记录ANR信息到Event Log中
EventLog.writeEvent(EventLogTags.AM_ANR, app.userId, app.pid,
app.processName, app.info.flags, annotation);
//添加当前app到firstpids列表中
firstPids.add(app.pid);
//如果可能添加父进程到firstpids列表种
int parentPid = app.pid;
...
// 将ANR信息存在info变量中,后续打印到LOGCAT,这部分的信息会以ActivityManager为Tag打印出来,包含了ANR的进程,出现原因以及当时的CPU状态,这些对分析ANR是非常重要的信息
StringBuilder info = new StringBuilder();
info.setLength(0);
info.append("ANR in ").append(app.processName);
if (activity != null && activity.shortComponentName != null) {
info.append(" (").append(activity.shortComponentName).append(")");
}
info.append("\n");
info.append("PID: ").append(app.pid).append("\n");
if (annotation != null) {
info.append("Reason: ").append(annotation).append("\n");
}
if (parent != null && parent != activity) {
info.append("Parent: ").append(parent.shortComponentName).append("\n");
}
//将ANR信息输出到traces文件,分为两种,一种带native层信息,一种不带
ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(true;
String[] nativeProcs = NATIVE_STACKS_OF_INTEREST;
// don't dump native PIDs for background ANRs
File tracesFile = null;
if (isSilentANR) {
//这里返回了一个文件,这里的文件路径是:`/data/anr/traces.txt`
//查找方法:adb shell getprop dalvik.vm.stack-trace-file
tracesFile = mService.dumpStackTraces(true, firstPids, null, lastPids,
null);
} else {
tracesFile = mService.dumpStackTraces(true, firstPids, processCpuTracker, lastPids,
nativeProcs);
}
//再次更新CPU信息,并且输出到SystemLog中
String cpuInfo = null;
if (ActivityManagerService.MONITOR_CPU_USAGE) {
mService.updateCpuStatsNow();
synchronized (mService.mProcessCpuTracker) {
cpuInfo = mService.mProcessCpuTracker.printCurrentState(anrTime);
}
info.append(processCpuTracker.printCurrentLoad());
info.append(cpuInfo);
}
info.append(processCpuTracker.printCurrentState(anrTime));
Slog.e(TAG, info.toString());
//上面的信息已经对应的ANR信息写入/data/anr/traces.txt中
//给底层发送信号Process.SIGNAL_QUIT=3
if (tracesFile == null) {
Process.sendSignal(app.pid, Process.SIGNAL_QUIT);
}
//将traces文件 和 CPU使用率信息保存到dropbox,即data/system/dropbox目录
//命名:system_server/system_app/data_app + type+...比如下面
//data_app_anr@1501989621992.txt.gz
//data_app_crash@1501989671926.txt
mService.addErrorToDropBox("anr", app, app.processName, activity, parent, annotation,
cpuInfo, tracesFile, null);
synchronized (mService) {
mService.mBatteryStatsService.noteProcessAnr(app.processName, app.uid);
//如果是后台ANR则直接杀掉结束
if (isSilentANR) {
app.kill("bg anr", true);
return;
}
//设置app的not响应状态,并查找errorReportReceiver
makeAppNotRespondingLocked(app,
activity != null ? activity.shortComponentName : null,
annotation != null ? "ANR " + annotation : "ANR",
info.toString());
//弹出ANR对话框
Message msg = Message.obtain();
HashMap<String, Object> map = new HashMap<String, Object>();
msg.what = ActivityManagerService.SHOW_NOT_RESPONDING_UI_MSG;
msg.obj = map;
msg.arg1 = aboveSystem ? 1 : 0;
map.put("app", app);
if (activity != null) {
map.put("activity", activity);
}
//向ui线程发送,内容为SHOW_NOT_RESPONDING_MSG的消息
mService.mUiHandler.sendMessage(msg);
}
}
我们来小节一下上面发生了什么:
- 立刻更新了CPU的信息
/** 2721 cpu (total|1|6),(user|1|6),(system|1|6),(iowait|1|6),(irq|1|6),(softirq|1|6) */ public static final int CPU = 2721; 给event_log中写入值
- 忽略一些anr
- 在event_log中打印am_anr的信息,这个是anr立刻发生的记录
- 将ANR信息存在info变量中,后续打印到LOGCAT,这部分的信息会以ActivityManager为Tag打印出来,包含了ANR的进程,出现原因以及当时的CPU状态,这些对分析ANR是非常重要的信息
- 将ANR信息输出到data/anr/traces文件
- 没有输出到traces文件的时候,给底层发送一个rocess.SIGNAL_QUIT=3信号
- 将traces文件 和 CPU使用率信息保存到dropbox,即data/system/dropbox目录
- 如果是后台ANR则直接杀掉结束
- 弹出ANR对话框
细节
怎么样就将信息保存到了/data/anr/traces.txt了
1.AMS.dumpStackTraces
public static File dumpStackTraces(boolean clearTraces, ArrayList<Integer> firstPids,
ProcessCpuTracker processCpuTracker, SparseArray<Boolean> lastPids, String[] nativeProcs) {
//tracesPath = "data/anr/traces.txt"
String tracesPath = SystemProperties.get("dalvik.vm.stack-trace-file", null);
if (tracesPath == null || tracesPath.length() == 0) {
return null;
}
File tracesFile = new File(tracesPath);
try {
if (clearTraces && tracesFile.exists()) tracesFile.delete();
tracesFile.createNewFile();
FileUtils.setPermissions(tracesFile.getPath(), 0666, -1, -1); // -rw-rw-rw-
} catch (IOException e) {
Slog.w(TAG, "Unable to prepare ANR traces file: " + tracesPath, e);
return null;
}
//[2]
dumpStackTraces(tracesPath, firstPids, processCpuTracker, lastPids, nativeProcs);
return tracesFile;
}
2.
private static void dumpStackTraces(String tracesPath, ArrayList<Integer> firstPids,
ProcessCpuTracker processCpuTracker, SparseArray<Boolean> lastPids, String[] nativeProcs){
FileObserver observer = new FileObserver(tracesPath, FileObserver.CLOSE_WRITE) {
@Override
public synchronized void onEvent(int event, String path) { notify(); }
};
try {
observer.startWatching();
// 获取发生ANR进程的pid,然后遍历这些进程给进程发送Process.SIGNAL_QUIT=3的信号
if (firstPids != null) {
try {
int num = firstPids.size();
for (int i = 0; i < num; i++) {
synchronized (observer) {
final long sime = SystemClock.elapsedRealtime();
Process.sendSignal(firstPids.get(i), Process.SIGNAL_QUIT);
observer.wait(1000); // Wait for write-close, give up after 1 sec
}
}
} catch (InterruptedException e) {
Slog.wtf(TAG, e);
}
}
// 接下来收集本地pids的堆栈
if (nativeProcs != null) {
int[] pids = Process.getPidsForCommands(nativeProcs);
if (pids != null) {
for (int pid : pids) {
final long sime = SystemClock.elapsedRealtime();
Debug.dumpNativeBacktraceToFileTimeout(pid, tracesPath, 10);//[3]输出native进程的trace并且限制超时时间
}
}
}
if (processCpuTracker != null) {
processCpuTracker.init();
System.gc();
processCpuTracker.update();
try {
synchronized (processCpuTracker) {
processCpuTracker.wait(500); // measure over 1/2 second.
}
} catch (InterruptedException e) {
}
processCpuTracker.update();
//从lastPids中选取CPU使用率 top 5的进程,输出这些进程的stacks
final int N = processCpuTracker.countWorkingStats();
int numProcs = 0;
for (int i=0; i<N && numProcs<5; i++) {
ProcessCpuTracker.Stats stats = processCpuTracker.getWorkingStats(i);
if (lastPids.indexOfKey(stats.pid) >= 0) {
numProcs++;
try {
synchronized (observer) {
final long stime = SystemClock.elapsedRealtime();
Process.sendSignal(stats.pid, Process.SIGNAL_QUIT);
observer.wait(1000); // Wait for write-close, give up after 1 sec
}
} catch (InterruptedException e) {
Slog.wtf(TAG, e);
}
} else if (DEBUG_ANR) {
Slog.d(TAG, "Skipping next CPU consuming process, not a java proc: "
+ stats.pid);
}
}
}
} finally {
observer.stopWatching();
}
}
小结:
收集发生anr进程的调用栈
- 发生anr的进程
- anr进程的父进程(anr进程是由于AMS生成,AMS在system_server进程中,system_server进程是anr的父进程)
- mLruProcesses中所有的persistent进程
收集Native进程的调用栈
"/system/bin/audioserver"
"/system/bin/cameraserver"
"/system/bin/drmserver"
"/system/bin/mediadrmserver"
"/system/bin/mediaserver"
"/system/bin/sdcard"
"/system/bin/surfaceflinger"
-
"media.codec"
// system/bin/mediacodec -
"media.extractor"
// system/bin/mediaextractor -
"com.android.bluetooth"
// Bluetooth service
收集lastPids进程的stacks
- 收集前五名
注意收集信息等待的时间
3.Debug.dumpNativeBacktraceToFileTimeout()
static void android_os_Debug_dumpNativeBacktraceToFileTimeout(JNIEnv* env, jobject clazz,
jint pid, jstring fileName, jint timeoutSecs)
{
if (fileName == NULL) {
jniThrowNullPointerException(env, "file == null");
return;
}
const jchar* str = env->GetStringCritical(fileName, 0);
String8 fileName8;
if (str) {
fileName8 = String8(reinterpret_cast<const char16_t*>(str),
env->GetStringLength(fileName));
env->ReleaseStringCritical(fileName, str);
}
//打开文件(data/anr/traces.txt)
int fd = open(fileName8.string(), O_CREAT | O_WRONLY | O_NOFOLLOW | O_CLOEXEC | O_APPEND, 0666);
if (fd < 0) {
fprintf(stderr, "Can't open %s: %s\n", fileName8.string(), strerror(errno));
return;
}
dump_backtrace_to_file_timeout(pid, fd, timeoutSecs);//[4]
close(fd);
}
4.dump_backtrace_to_file_timeout()
int dump_backtrace_to_file_timeout(pid_t tid, int fd, int timeout_secs) {
//发送dump请求得到sock_fd
int sock_fd = make_dump_request(DEBUGGER_ACTION_DUMP_BACKTRACE, tid, timeout_secs);
if (sock_fd < 0) {
return -1;
}
int result = 0;
char buffer[1024];
ssize_t n;
int flag = 0;
//从sock_fd中读取信息写入data/anr/traces.txt中
while ((n = TEMP_FAILURE_RETRY(read(sock_fd, buffer, sizeof(buffer)))) > 0) {
flag = 1;
if (TEMP_FAILURE_RETRY(write(fd, buffer, n)) != n) {
result = -1;
break;
}
}
close(sock_fd);
...
return result;
}
主要是通过给底层发送DEBUGGER_ACTION_DUMP_BACKTRACE
来请求dump的sock_fd句柄,底层调用dump_backtraces()来获取信息,从而写入data/anr/traces.txt文件中
总结
当发生anr的时候,距离ANR最近的时间是am_anr这个日志的时间,然后会打印各种信息有底层dump的,有进程的调用栈信息等等。最后将trances.txt写入data/system/dropbox目录下,并且重命名,规则见上文。
补充
其中Process.sendSignal(stats.pid, Process.SIGNAL_QUIT);
发出退出进程信号