美文网首页性能
Android之进程 crash监听

Android之进程 crash监听

作者: 锄禾豆 | 来源:发表于2022-02-06 22:16 被阅读0次

    前言:
    针对crash业务分析
    代码
    Android 10.0

    详细:

    一、日志分析

    1.异常弹框日志

    :54.358  1066  1066 E AndroidRuntime: java.lang.NullPointerException: Attempt to invoke virtual method 'boolean android.os.Handler.post(java.lang.Runnable)' on a null object reference
        Line 4065: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at com.example.myapplication.AutoCameraTestActivity.onClick(AutoCameraTestActivity.java:106)
        Line 4066: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.view.View.performClick(View.java:5637)
        Line 4067: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.view.View$PerformClick.run(View.java:22445)
        Line 4068: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.os.Handler.handleCallback(Handler.java:755)
        Line 4069: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.os.Handler.dispatchMessage(Handler.java:95)
        Line 4070: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.os.Looper.loop(Looper.java:154)
        Line 4071: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.app.ActivityThread.main(ActivityThread.java:6141)
        Line 4072: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at java.lang.reflect.Method.invoke(Native Method)
        Line 4073: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at com.android.internal.os.ZygoteInit$MethodAndArgsCaller.run(ZygoteInit.java:912)
        Line 4074: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:802)
    

    2.进程启动加载的数据

    1149  1149 W System.err: java.lang.Exception: RuntimeInit
    11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.RuntimeInit.commonInit(RuntimeInit.java:122)
    11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.RuntimeInit.zygoteInit(RuntimeInit.java:288)
    11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.ZygoteConnection.handleChildProc(ZygoteConnection.java:757)
    11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.ZygoteConnection.runOnce(ZygoteConnection.java:243)
    11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.ZygoteInit.runSelectLoop(ZygoteInit.java:876)
    11-05 23:17:47.688  1149  1149 W System.err:    at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:798)
    

    二、代码分析
    1.java层的异常弹框监听

    com.android.internal.os.RuntimeInit.java
    
        private static final void commonInit() {
            //1.设置预处理异常业务,目的输出异常日志。此接口setUncaughtExceptionPreHandler为hide
            //2.设置默认异常处理业务,目的是kill应用和定制ams控制业务
            //1和2分开,可以避免普通应用把异常日志捕获而系统无法留档
            LoggingHandler loggingHandler = new LoggingHandler();
            RuntimeHooks.setUncaughtExceptionPreHandler(loggingHandler);
            Thread.setDefaultUncaughtExceptionHandler(new KillApplicationHandler(loggingHandler));
            ···
        }
    
        //预处理中的异常信息记录
        //注意:普通应用和system_server应用异常日志区别
        //共同TAG:FATAL EXCEPTION
        //异常日志采用ID为LOG_ID_CRASH的方式记录:Log.printlns(Log.LOG_ID_CRASH, Log.ERROR, tag, msg, tr)
        private static class LoggingHandler implements Thread.UncaughtExceptionHandler {
            public volatile boolean mTriggered = false;
    
            @Override
            public void uncaughtException(Thread t, Throwable e) {
                mTriggered = true;
    
                // Don't re-enter if KillApplicationHandler has already run
                if (mCrashing) return;
    
                if (mApplicationObject == null && (Process.SYSTEM_UID == Process.myUid())) {
                    //system_server异常,日志打印
                    Clog_e(TAG, "*** FATAL EXCEPTION IN SYSTEM PROCESS: " + t.getName(), e);
                } else {
                   //普通应用异常,日志打印
                    StringBuilder message = new StringBuilder();
                    message.append("FATAL EXCEPTION: ").append(t.getName()).append("\n");
                    final String processName = ActivityThread.currentProcessName();
                    if (processName != null) {
                        message.append("Process: ").append(processName).append(", ");
                    }
                    message.append("PID: ").append(Process.myPid());
                    Clog_e(TAG, message.toString(), e);
                }
            }
        }
        
        //通知ams处理异常业务
        private static class KillApplicationHandler implements Thread.UncaughtExceptionHandler {
            private final LoggingHandler mLoggingHandler;
    
            public KillApplicationHandler(LoggingHandler loggingHandler) {
                this.mLoggingHandler = Objects.requireNonNull(loggingHandler);
            }
    
            @Override
            public void uncaughtException(Thread t, Throwable e) {
                try {
                    //如果日志在预处理之前没有抓取到,则再抓取一次
                    ensureLogging(t, e);
    
                    if (mCrashing) return;
                    mCrashing = true;
    
                    if (ActivityThread.currentActivityThread() != null) {
                        ActivityThread.currentActivityThread().stopProfiling();
                    }
                    //通知ams处理异常业务,例如通知dropbox记录异常信息、弹异常对话框等等
                    ActivityManager.getService().handleApplicationCrash(
                            mApplicationObject, new ApplicationErrorReport.ParcelableCrashInfo(e));
                } catch (Throwable t2) {
                    if (t2 instanceof DeadObjectException) {
                        // System process is dead; ignore
                    } else {
                        try {
                            Clog_e(TAG, "Error reporting crash", t2);
                        } catch (Throwable t3) {
                            // Even Clog_e() fails!  Oh well.
                        }
                    }
                } finally {
                    //通过kill -9杀应用
                    Process.killProcess(Process.myPid());
                    System.exit(10);
                }
            }
    
            private void ensureLogging(Thread t, Throwable e) {
                if (!mLoggingHandler.mTriggered) {
                    try {
                        mLoggingHandler.uncaughtException(t, e);
                    } catch (Throwable loggingThrowable) {
                        // Ignored.
                    }
                }
            }
        }
    

    总结:
    1)Zygote进程fork app进程时,调用RuntimeInit.zygoteInit,设置监听Thread.setDefaultUncaughtExceptionHandler(new UncaughtHandler())。这是在启动进程时,进程内部创建的java层异常监听
    2)ActivityManagerService.handleApplicationCrash就是展示对话框的业务入口函数。
    3)异常TAG搜索:FATAL EXCEPTION
    4)这种方式无法监听jni异常
    5)特别说明,
    设置预处理异常业务,目的输出异常日志。此接口setUncaughtExceptionPreHandler为hide
    设置默认异常处理业务,目的是kill应用和定制ams控制业务
    将两者分开,可以避免普通应用把异常日志捕获而系统无法留档

    2.Native层的异常弹框处理

    com.android.server.SystemServer
    private void startOtherServices() {
       ···
       mActivityManagerService.startObservingNativeCrashes();
       ···
    }
    
    com.android.server.am.ActivityManagerService
    public void startObservingNativeCrashes() {
        final NativeCrashListener ncl = new NativeCrashListener(this);
        ncl.start();
    }
    
    
    com.android.server.am.NativeCrashListener
    final class NativeCrashListener extends Thread {
        ···
        static final String DEBUGGERD_SOCKET_PATH = "/data/system/ndebugsocket";
        ···
        public void run() {
            final byte[] ackSignal = new byte[1];
            ···
            try {
                //创建socket服务端
                FileDescriptor serverFd = Os.socket(AF_UNIX, SOCK_STREAM, 0);
                final UnixSocketAddress sockAddr = UnixSocketAddress.createFileSystem(
                        DEBUGGERD_SOCKET_PATH);
                Os.bind(serverFd, sockAddr);
                Os.listen(serverFd, 1);
                Os.chmod(DEBUGGERD_SOCKET_PATH, 0777);
    
                while (true) {
                    FileDescriptor peerFd = null;
                    try {
                        //等待socket客户端连接
                        peerFd = Os.accept(serverFd, null /* peerAddress */);
                        if (peerFd != null) {
                            StructUcred credentials =
                                    Os.getsockoptUcred(peerFd, SOL_SOCKET, SO_PEERCRED);
                            //socket客户端的uid为0才能处理异常数据
                            if (credentials.uid == 0) {
                                //消化native异常信息
                                consumeNativeCrashData(peerFd);
                            }
                        }
                    } catch (Exception e) {
                        Slog.w(TAG, "Error handling connection", e);
                    } finally {
                        if (peerFd != null) {
                            try {
                                Os.write(peerFd, ackSignal, 0, 1);
                            } catch (Exception e) {
                                
                            }
                            try {
                                Os.close(peerFd);
                            } catch (ErrnoException e) {
                                
                            }
                        }
                    }
                }
            } catch (Exception e) {
                Slog.e(TAG, "Unable to init native debug socket!", e);
            }
        }
        
        void consumeNativeCrashData(FileDescriptor fd) {
            final byte[] buf = new byte[4096];
            final ByteArrayOutputStream os = new ByteArrayOutputStream(4096);
    
            try {
                StructTimeval timeout = StructTimeval.fromMillis(SOCKET_TIMEOUT_MILLIS);
                Os.setsockoptTimeval(fd, SOL_SOCKET, SO_RCVTIMEO, timeout);
                Os.setsockoptTimeval(fd, SOL_SOCKET, SO_SNDTIMEO, timeout);
    
                //从fd中读取信息到buf
                int headerBytes = readExactly(fd, buf, 0, 8);
                if (headerBytes != 8) {
                    return;
                }
    
                //从buf中读取pid和signal
                int pid = unpackInt(buf, 0);
                int signal = unpackInt(buf, 4);
    
                // now the text of the dump
                if (pid > 0) {
                    final ProcessRecord pr;
                    synchronized (mAm.mPidsSelfLocked) {
                        pr = mAm.mPidsSelfLocked.get(pid);
                    }
                    if (pr != null) {
                        //如果是persistent进程,则不记录
                        if (pr.isPersistent()) {
                            return;
                        }
    
                        //将数据读取到os中
                        int bytes;
                        do {
                            // get some data
                            bytes = Os.read(fd, buf, 0, buf.length);
                            if (bytes > 0) {
                                if (buf[bytes-1] == 0) {
                                    os.write(buf, 0, bytes-1);  // exclude the EOD token
                                    break;
                                }
                                // no EOD, so collect it and read more
                                os.write(buf, 0, bytes);
                            }
                        } while (bytes > 0);
                        
    
                        synchronized (mAm) {
                            pr.setCrashing(true);
                            pr.forceCrashReport = true;
                        }
                        //把os数据转化为String
                        final String reportString = new String(os.toByteArray(), "UTF-8");
                        //报告原因
                        (new NativeCrashReporter(pr, signal, reportString)).start();
                    } else {
                        Slog.w(TAG, "Couldn't find ProcessRecord for pid " + pid);
                    }
                } else {
                    Slog.e(TAG, "Bogus pid!");
                }
            } catch (Exception e) {
                Slog.e(TAG, "Exception dealing with report", e);
                // ugh, fail.
            }
        }
        
        class NativeCrashReporter extends Thread {
            ProcessRecord mApp;
            int mSignal;
            String mCrashReport;
    
            NativeCrashReporter(ProcessRecord app, int signal, String report) {
                super("NativeCrashReport");
                mApp = app;
                mSignal = signal;
                mCrashReport = report;
            }
    
            @Override
            public void run() {
                try {
                    CrashInfo ci = new CrashInfo();
                    ci.exceptionClassName = "Native crash";
                    ci.exceptionMessage = Os.strsignal(mSignal);
                    ci.throwFileName = "unknown";
                    ci.throwClassName = "unknown";
                    ci.throwMethodName = "unknown";
                    ci.stackTrace = mCrashReport;
                    //通知ams
                    mAm.handleApplicationCrashInner("native_crash", mApp, mApp.processName, ci);//1
                } catch (Exception e) {
                    Slog.e(TAG, "Unable to report native crash", e);
                }
            }
        }
    }
    

    总结
    1)native监听实现是在线程中开启了一个while循环
    2)注意,对于persistent进程,不做crash report
    3)ams设置的native监听,是作为socket服务端,而客户端来自debuggerd进程
    4)ams接收异常后,调用handleApplicationCrashInner

    3.Ams.handleApplicationCrashInner分析
    不管是java层还是native层的crash,最终都会通知Ams.handleApplicationCrashInner
    1)分析handleApplicationCrashInner

    ActivityManagerService
    
        void handleApplicationCrashInner(String eventType, ProcessRecord r, String processName,
                ApplicationErrorReport.CrashInfo crashInfo) {
            //events log日志:EventLogTags.AM_CRASH --> am_crash
            EventLog.writeEvent(EventLogTags.AM_CRASH, Binder.getCallingPid(),
                    UserHandle.getUserId(Binder.getCallingUid()), processName,
                    r == null ? -1 : r.info.flags,
                    crashInfo.exceptionClassName,
                    crashInfo.exceptionMessage,
                    crashInfo.throwFileName,
                    crashInfo.throwLineNumber);
            ···
            //异常信息注入dropbox
            addErrorToDropBox(
                    eventType, r, processName, null, null, null, null, null, null, crashInfo);
            //app异常报告
            mAppErrors.crashApplication(r, crashInfo);
        }
    

    a)分析crash异常时,也可以关注events日志tag:am_crash
    b)执行是否重启app或者弹对话框进行人为点击确定
    c)dropbox也会记录异常信息,前缀名称system_server/system_app/data_app。即/data/system/dropbox

        private static String processClass(ProcessRecord process) {
            if (process == null || process.pid == MY_PID) {//system_server进程
                return "system_server";
            } else if ((process.info.flags & ApplicationInfo.FLAG_SYSTEM) != 0) {//集成在system/app或priv-app的为system_app
                return "system_app";
            } else {//其他安装的为data_app
                return "data_app";
            }
        }
    

    2)mAppErrors.crashApplication(r, crashInfo);

    a)针对persistent或apexmodule进程,进行营救记录
    b)过滤不弹对话框业务条件
    c)通知handler处理对话框业务
    d)等待handler并处理相关结果
    com.android.server.am.AppErrors
        void crashApplicationInner(ProcessRecord r, ApplicationErrorReport.CrashInfo crashInfo,
                int callingPid, int callingUid) {
            ···
            //针对persistent或apexmodule进程,进行营救记录
            if (r != null) {
                ···
                if (r.isPersistent() || isApexModule) {
                    RescueParty.noteAppCrash(mContext, r.uid);
                }
    
                mPackageWatchdog.onPackageFailure(r.getPackageListWithVersionCode());
            }
    
            ···
            synchronized (mService) {
                //过滤不弹对话框业务条件
                //这里可以实现IActivityController接口,从而满足无须弹框业务。ActivityTaskManagerService.setActivityController
                ···
                //通知handler处理对话框业务
                final Message msg = Message.obtain();
                msg.what = ActivityManagerService.SHOW_ERROR_UI_MSG;
    
                taskId = data.taskId;
                msg.obj = data;
                mService.mUiHandler.sendMessage(msg);
            }
    
            //等待handler并处理相关结果
            int res = result.get();//阻塞
            ···
            
        }
    

    3)通知handler处理对话框业务
    主要处理是否展示对话框业务

    com.android.server.am.AppErrors
        void handleShowAppErrorUi(Message msg) {
            AppErrorDialog.Data data = (AppErrorDialog.Data) msg.obj;
            //针对anr业务是否需要展示对话框,默认不展示
            boolean showBackground = Settings.Secure.getInt(mContext.getContentResolver(),
                    Settings.Secure.ANR_SHOW_BACKGROUND, 0) != 0;
    
            AppErrorDialog dialogToShow = null;
            ···
            synchronized (mService) {
                ···
                final boolean showFirstCrashDevOption = Settings.Secure.getIntForUser(
                        mContext.getContentResolver(),
                        Settings.Secure.SHOW_FIRST_CRASH_DIALOG_DEV_OPTION,
                        0,
                        mService.mUserController.getCurrentUserId()) != 0;
                //静默crash白名单。来自framework-res.apk的config_appsNotReportingCrashes,多个用“,”隔开
                //例如:com.android.settings,com.android.systemui
                final boolean crashSilenced = mAppsNotReportingCrashes != null &&
                        mAppsNotReportingCrashes.contains(proc.info.packageName);
                
                //默认展示对话框,可设置Settings.Global.HIDE_ERROR_DIALOGS为1,来隐藏对话框
                if ((mService.mAtmInternal.canShowErrorDialogs() || showBackground)
                        && !crashSilenced
                        && (showFirstCrashDevOption || data.repeating)) {
                    proc.crashDialog = dialogToShow = new AppErrorDialog(mContext, mService, data);
                    ···
                } else {
                    // The device is asleep, so just pretend that the user
                    // saw a crash dialog and hit "force quit".
                    if (res != null) {
                        res.set(AppErrorDialog.CANT_SHOW);
                    }
                }
            }
            
            if (dialogToShow != null) {
                Slog.i(TAG, "Showing crash dialog for package " + packageName + " u" + userId);
                dialogToShow.show();
            }
        }
    

    针对异常对话框展示问题
    a)局部,加白向framework-res.apk的config_appsNotReportingCrashes中加白处理
    b)全局,设置Settings.Global.HIDE_ERROR_DIALOGS为1,Settings.Secure.ANR_SHOW_BACKGROUND为0

    4)怎么实现等待handler处理?

    com.android.server.am.AppErrorResult
    
    final class AppErrorResult {
        //set就是notifyAll机制
        public void set(int res) {
            synchronized (this) {
                mHasResult = true;
                mResult = res;
                notifyAll();
            }
        }
    
       //get就是wait机制
        public int get() {
            synchronized (this) {
                while (!mHasResult) {
                    try {
                        wait();
                    } catch (InterruptedException e) {
                    }
                }
            }
            return mResult;
        }
    
        boolean mHasResult = false;
        int mResult;
    }
    

    参考学习

    https://juejin.cn/post/6844904006041468935
    

    相关文章

      网友评论

        本文标题:Android之进程 crash监听

        本文链接:https://www.haomeiwen.com/subject/fwwqkrtx.html