美文网首页android之基础学习攻克
tombstone与debuggerd相关流程

tombstone与debuggerd相关流程

作者: weiinter105 | 来源:发表于2018-11-18 23:49 被阅读0次

    tombstone的抓取与debuggerd的有关系是一个守护进程,用来检测程序的崩溃,将程序崩溃前进程的状态记录下来,保存在/data/tombstone文件夹下,最多10个;本质上是对程序崩溃时某些信号的拦截

    相关流程

    客户端流程

    首先,Android程序的入口有一个linker的操作,大致流程如下:

    bionic/linker/arch/arm64/begin.S
    31ENTRY(_start)
    32  mov x0, sp
    33  bl __linker_init
    34
    35  /* linker init returns the _entry address in the main image */
    36  br x0
    37END(_start)
    
    
    bionic/linker/linker.cpp
    4442/*
    4443 * This is the entry point for the linker, called from begin.S. This
    4444 * method is responsible for fixing the linker's own relocations, and
    4445 * then calling __linker_init_post_relocation().
    4446 *
    4447 * Because this method is called before the linker has fixed it's own
    4448 * relocations, any attempt to reference an extern variable, extern
    4449 * function, or other GOT reference will generate a segfault.
    4450 */
    4451extern "C" ElfW(Addr) __linker_init(void* raw_args) {
              ...
    4522  // We have successfully fixed our own relocations. It's safe to run
    4523  // the main part of the linker now.
    4524  args.abort_message_ptr = &g_abort_message;
    4525  ElfW(Addr) start_address = __linker_init_post_relocation(args, linker_addr);
    4526
    4527  INFO("[ Jumping to _start (%p)... ]", reinterpret_cast<void*>(start_address));
    4528
    4529  // Return the address that the calling assembly stub should jump to.
    4530  return start_address;
    4531}
    
    4195/*
    4196 * This code is called after the linker has linked itself and
    4197 * fixed it's own GOT. It is safe to make references to externs
    4198 * and other non-local data at this point.
    4199 */
    4200static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(Addr) linker_base) {
    4201#if TIMING
    4202  struct timeval t0, t1;
    4203  gettimeofday(&t0, 0);
    4204#endif
    4205
    4206  // Sanitize the environment.
    4207  __libc_init_AT_SECURE(args);
    4208
    4209  // Initialize system properties
    4210  __system_properties_init(); // may use 'environ'
    4211
    4212  debuggerd_init();
    4213
    4214  // Get a few environment variables.
    4215  const char* LD_DEBUG = getenv("LD_DEBUG");
    4216  if (LD_DEBUG != nullptr) {
    4217    g_ld_debug_verbosity = atoi(LD_DEBUG);
    4218  }
               ...
    4412}
    
    bionic/linker/debugger.cpp
    302__LIBC_HIDDEN__ void debuggerd_init() {
    303  struct sigaction action;
    304  memset(&action, 0, sizeof(action));
    305  sigemptyset(&action.sa_mask);
    306  action.sa_sigaction = debuggerd_signal_handler;
    307  action.sa_flags = SA_RESTART | SA_SIGINFO;
    308
    309  // Use the alternate signal stack if available so we can catch stack overflows.
    310  action.sa_flags |= SA_ONSTACK;
    311
    312  sigaction(SIGABRT, &action, nullptr);
    313  sigaction(SIGBUS, &action, nullptr);
    314  sigaction(SIGFPE, &action, nullptr);
    315  sigaction(SIGILL, &action, nullptr);
    316  sigaction(SIGSEGV, &action, nullptr);
    317#if defined(SIGSTKFLT)
    318  sigaction(SIGSTKFLT, &action, nullptr);
    319#endif
    320  sigaction(SIGTRAP, &action, nullptr);
    321}
    

    为上面这几个信号注册信号处理函数,也就是说只有这几个信号会生成tombstone

    SIGILL(非法指令异常)

    SIGABRT(abort退出异常)

    SIGBUS(硬件访问异常)

    SIGFPE(浮点运算异常)

    SIGSEGV(内存访问异常)

    SIGSTKFLT(协处理器栈异常)

    SIGTRAP(这是什么?好像不常见)

    信号处理函数为:

    258/*
    259 * Catches fatal signals so we can ask debuggerd to ptrace us before
    260 * we crash.
    261 */
    262static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void*) {
    263  // It's possible somebody cleared the SA_SIGINFO flag, which would mean
    264  // our "info" arg holds an undefined value.
    265  if (!have_siginfo(signal_number)) {
    266    info = nullptr;
    267  }
    268
    269  log_signal_summary(signal_number, info);
    270
    271  send_debuggerd_packet(info); //发送请求 第一次接受到信号是向debuggerd服务端发送请求,等待回应表示链接上了
    272
    273  // We need to return from the signal handler so that debuggerd can dump the
    274  // thread that crashed, but returning here does not guarantee that the signal
    275  // will be thrown again, even for SIGSEGV and friends, since the signal could
    276  // have been sent manually. Resend the signal with rt_tgsigqueueinfo(2) to
    277  // preserve the SA_SIGINFO contents.
    278  signal(signal_number, SIG_DFL); //将信号处理函数置空
    279
    280  struct siginfo si;
    281  if (!info) {
    282    memset(&si, 0, sizeof(si));
    283    si.si_code = SI_USER;
    284    si.si_pid = getpid();
    285    si.si_uid = getuid();
    286    info = &si;
    287  } else if (info->si_code >= 0 || info->si_code == SI_TKILL) {
    288    // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels
    289    // that contain commit 66dd34a (3.9+). The manpage claims to only allow
    290    // negative si_code values that are not SI_TKILL, but 66dd34a changed the
    291    // check to allow all si_code values in calls coming from inside the house.
    292  }
    293
    294  int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), gettid(), signal_number, info); //给自己的相关线程再发送一次信号
    295  if (rc != 0) {
    296    __libc_format_log(ANDROID_LOG_FATAL, "libc", "failed to resend signal during crash: %s",
    297                      strerror(errno));
    298    _exit(0);
    299  }
    300}
    

    客户端向denggerd发送信息,并等待回应,通过socket的write & read

    208static void send_debuggerd_packet(siginfo_t* info) {
    209  // Mutex to prevent multiple crashing threads from trying to talk
    210  // to debuggerd at the same time.
    211  static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER;
    212  int ret = pthread_mutex_trylock(&crash_mutex);
    213  if (ret != 0) {
    214    if (ret == EBUSY) {
    215      __libc_format_log(ANDROID_LOG_INFO, "libc",
    216          "Another thread contacted debuggerd first; not contacting debuggerd.");
    217      // This will never complete since the lock is never released.
    218      pthread_mutex_lock(&crash_mutex);
    219    } else {
    220      __libc_format_log(ANDROID_LOG_INFO, "libc",
    221                        "pthread_mutex_trylock failed: %s", strerror(ret));
    222    }
    223    return;
    224  }
    225
    226  int s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM | SOCK_CLOEXEC);
    227  if (s == -1) {
    228    __libc_format_log(ANDROID_LOG_FATAL, "libc", "Unable to open connection to debuggerd: %s",
    229                      strerror(errno));
    230    return;
    231  }
    232
    233  // debuggerd knows our pid from the credentials on the
    234  // local socket but we need to tell it the tid of the crashing thread.
    235  // debuggerd will be paranoid and verify that we sent a tid
    236  // that's actually in our process.
    237  debugger_msg_t msg;
    238  msg.action = DEBUGGER_ACTION_CRASH;
    239  msg.tid = gettid();
    240  msg.abort_msg_address = reinterpret_cast<uintptr_t>(g_abort_message);
    241  msg.original_si_code = (info != nullptr) ? info->si_code : 0;
    242  ret = TEMP_FAILURE_RETRY(write(s, &msg, sizeof(msg)));
    243  if (ret == sizeof(msg)) {
    244    char debuggerd_ack;
    245    ret = TEMP_FAILURE_RETRY(read(s, &debuggerd_ack, 1));
    246    int saved_errno = errno;
    247    notify_gdb_of_libraries();
    248    errno = saved_errno;
    249  } else {
    250    // read or write failed -- broken connection?
    251    __libc_format_log(ANDROID_LOG_FATAL, "libc", "Failed while talking to debuggerd: %s",
    252                      strerror(errno));
    253  }
    254
    255  close(s);
    256}
    

    debuggerd服务端启动,dump流程

    debuggerd守护进程如何启动,可以通过debuggerd -b 启动,我们暂且不去说他,就说正常的启动模式

    941int main(int argc, char** argv) {
    942  union selinux_callback cb;
    943  if (argc == 1) {
    944    cb.func_audit = audit_callback;
    945    selinux_set_callback(SELINUX_CB_AUDIT, cb);
    946    cb.func_log = selinux_log_callback;
    947    selinux_set_callback(SELINUX_CB_LOG, cb);
    948    return do_server();
    949  }
    950
    951  bool dump_backtrace = false;
    952  bool have_tid = false;
    953  pid_t tid = 0;
    954  for (int i = 1; i < argc; i++) {
    955    if (!strcmp(argv[i], "-b")) {
    956      dump_backtrace = true;
    957    } else if (!have_tid) {
    958      tid = atoi(argv[i]);
    959      have_tid = true;
    960    } else {
    961      usage();
    962      return 1;
    963    }
    964  }
    965  if (!have_tid) {
    966    usage();
    967    return 1;
    968  }
    969  return do_explicit_dump(tid, dump_backtrace);
    970}
    

    启动一个debuggerd服务端

    849static int do_server() {
    850  // debuggerd crashes can't be reported to debuggerd.
    851  // Reset all of the crash handlers.
    852  signal(SIGABRT, SIG_DFL);
    853  signal(SIGBUS, SIG_DFL);
    854  signal(SIGFPE, SIG_DFL);
    855  signal(SIGILL, SIG_DFL);
    856  signal(SIGSEGV, SIG_DFL);
    857#ifdef SIGSTKFLT
    858  signal(SIGSTKFLT, SIG_DFL);
    859#endif
    860  signal(SIGTRAP, SIG_DFL);
    861
    862  // Ignore failed writes to closed sockets
    863  signal(SIGPIPE, SIG_IGN); //将debuggerd本身的crash忽略
    864
    865  // Block SIGCHLD so we can sigtimedwait for it.
    866  sigset_t sigchld;
    867  sigemptyset(&sigchld);
    868  sigaddset(&sigchld, SIGCHLD);
    869  sigprocmask(SIG_SETMASK, &sigchld, nullptr);
    870
    871  int s = socket_local_server(SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT,
    872                              SOCK_STREAM | SOCK_CLOEXEC); //创建一个服务端,等待客户端连接
    873  if (s == -1) return 1;
    874
    875  typedef void (*NativeDebugInit)(void);
    876  static NativeDebugInit s_func_ptr = NULL;
    877  if(!s_func_ptr) {
    878    void* handle = dlopen("libmiuindbg.so",RTLD_NOW);
    879    if(handle) {
    880      s_func_ptr = (NativeDebugInit)dlsym(handle,"hook_context_do_hook");
    881    }
    882  }
    883
    884  if(s_func_ptr) {
    885    s_func_ptr();
    886  }
    887
    888  // Fork a process that stays root, and listens on a pipe to pause and resume the target.
    889  if (!start_signal_sender()) {
    890    ALOGE("debuggerd: failed to fork signal sender");
    891    return 1;
    892  }
    893
    894  ALOGI("debuggerd: starting\n");
    895
    896  for (;;) {
    897    sockaddr_storage ss;
    898    sockaddr* addrp = reinterpret_cast<sockaddr*>(&ss);
    899    socklen_t alen = sizeof(ss);
    900
    901    ALOGV("waiting for connection\n");
    902    int fd = accept4(s, addrp, &alen, SOCK_CLOEXEC);
    903    if (fd == -1) {
    904      ALOGE("accept failed: %s\n", strerror(errno));
    905      continue;
    906    }
    907
    908    handle_request(fd); //处理客户端的请求
    909  }
    910  return 0;
    911}
    

    处理客户端发来的请求

    808static void handle_request(int fd) {
    809  ALOGV("handle_request(%d)\n", fd);
    810
    811  ScopedFd closer(fd);
    812  debugger_request_t request;
    813  memset(&request, 0, sizeof(request));
    814  int status = read_request(fd, &request); //读取客户端的请求
    815  if (status != 0) {
    816    return;
    817  }
    818
    819  ALOGW("debuggerd: handling request: pid=%d uid=%d gid=%d tid=%d\n", request.pid, request.uid,
    820        request.gid, request.tid);
    821
    822#if defined(__LP64__)
    823  // On 64 bit systems, requests to dump 32 bit and 64 bit tids come
    824  // to the 64 bit debuggerd. If the process is a 32 bit executable,
    825  // redirect the request to the 32 bit debuggerd.
    826  if (is32bit(request.tid)) {
    827    // Only dump backtrace and dump tombstone requests can be redirected.
    828    if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE ||
    829        request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
    830      redirect_to_32(fd, &request);
    831    } else {
    832      ALOGE("debuggerd: Not allowed to redirect action %d to 32 bit debuggerd\n", request.action);
    833    }
    834    return;
    835  }
    836#endif
    837
    838  // Fork a child to handle the rest of the request.
    839  pid_t fork_pid = fork();
    840  if (fork_pid == -1) {
    841    ALOGE("debuggerd: failed to fork: %s\n", strerror(errno));
    842  } else if (fork_pid == 0) {
    843    worker_process(fd, request); //处理request
    844  } else {
    845    monitor_worker_process(fork_pid, request);
    846  }
    847}
    

    read客户端发来的信息

    197static int read_request(int fd, debugger_request_t* out_request) {
    198  ucred cr;
    199  socklen_t len = sizeof(cr);
    200  int status = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
    201  if (status != 0) {
    202    ALOGE("cannot get credentials");
    203    return -1;
    204  }
    205
    206  ALOGV("reading tid");
    207  fcntl(fd, F_SETFL, O_NONBLOCK);
    208
    209  pollfd pollfds[1];
    210  pollfds[0].fd = fd;
    211  pollfds[0].events = POLLIN;
    212  pollfds[0].revents = 0;
    213  status = TEMP_FAILURE_RETRY(poll(pollfds, 1, 3000)); //轮询fd句柄
    215    ALOGE("timed out reading tid (from pid=%d uid=%d)\n", cr.pid, cr.uid);
    216    return -1;
    217  }
    218
    219  debugger_msg_t msg;
    220  memset(&msg, 0, sizeof(msg));
    221  status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg))); //读取客户端信息
    222  if (status < 0) {
    223    ALOGE("read failure? %s (pid=%d uid=%d)\n", strerror(errno), cr.pid, cr.uid);
    224    return -1;
    225  }
    226  if (status != sizeof(debugger_msg_t)) {
    227    ALOGE("invalid crash request of size %d (from pid=%d uid=%d)\n", status, cr.pid, cr.uid);
    228    return -1;
    229  }
    230
    231  out_request->action = static_cast<debugger_action_t>(msg.action);
    232  out_request->tid = msg.tid;
    233  out_request->pid = cr.pid;
    234  out_request->uid = cr.uid;
    235  out_request->gid = cr.gid;
    236  out_request->abort_msg_address = msg.abort_msg_address;
    237  out_request->original_si_code = msg.original_si_code;
    238
    239  if (msg.action == DEBUGGER_ACTION_CRASH) {
    240    // Ensure that the tid reported by the crashing process is valid.
    241    // This check needs to happen again after ptracing the requested thread to prevent a race.
    242    if (!pid_contains_tid(out_request->pid, out_request->tid)) {
    243      ALOGE("tid %d does not exist in pid %d. ignoring debug request\n", out_request->tid,
    244            out_request->pid);
    245      return -1;
    246    }
    247  } else if (cr.uid == 0 || (cr.uid == AID_SYSTEM && msg.action == DEBUGGER_ACTION_DUMP_BACKTRACE)) {
    248    // Only root or system can ask us to attach to any process and dump it explicitly.
    249    // However, system is only allowed to collect backtraces but cannot dump tombstones.
    250    status = get_process_info(out_request->tid, &out_request->pid,
    251                              &out_request->uid, &out_request->gid);
    252    if (status < 0) {
    253      ALOGE("tid %d does not exist. ignoring explicit dump request\n", out_request->tid);
    254      return -1;
    255    }
    256
    257    if (!selinux_action_allowed(fd, out_request))
    258      return -1;
    259  } else {
    260    // No one else is allowed to dump arbitrary processes.
    261    return -1;
    262  }
    263  return 0;
    264}
    

    整体的dump流程

    566static void worker_process(int fd, debugger_request_t& request) {
    567  // Open the tombstone file if we need it.
    568  std::string tombstone_path;
    569  int tombstone_fd = -1;
    570  switch (request.action) {
    571    case DEBUGGER_ACTION_DUMP_TOMBSTONE:
    572    case DEBUGGER_ACTION_CRASH:
    573      tombstone_fd = open_tombstone(&tombstone_path); 
    574      if (tombstone_fd == -1) {
    575        ALOGE("debuggerd: failed to open tombstone file: %s\n", strerror(errno));
    576        exit(1);
    577      }
    578      break;
    579
    580    case DEBUGGER_ACTION_DUMP_BACKTRACE:
    581      break;
    582
    583    default:
    584      ALOGE("debuggerd: unexpected request action: %d", request.action);
    585      exit(1);
    586  }
    587
    588  // At this point, the thread that made the request is blocked in
    589  // a read() call.  If the thread has crashed, then this gives us
    590  // time to PTRACE_ATTACH to it before it has a chance to really fault.
    591  //
    592  // The PTRACE_ATTACH sends a SIGSTOP to the target process, but it
    593  // won't necessarily have stopped by the time ptrace() returns.  (We
    594  // currently assume it does.)  We write to the file descriptor to
    595  // ensure that it can run as soon as we call PTRACE_CONT below.
    596  // See details in bionic/libc/linker/debugger.c, in function
    597  // debugger_signal_handler().
    598
    599  // Attach to the target process.
            //通过ptrace监控子进程(要crash的应用进程),此时debuggerd变为其父进程,向应用进程发送sigstop;以后应用进程接受到的signal会先发到父进程
    600  if (!ptrace_attach_thread(request.pid, request.tid)) {
    601    ALOGE("debuggerd: ptrace attach failed: %s", strerror(errno));
    602    exit(1);
    603  }
    604
    605  // DEBUGGER_ACTION_CRASH requests can come from arbitrary processes and the tid field in the
    606  // request is sent from the other side. If an attacker can cause a process to be spawned with the
    607  // pid of their process, they could trick debuggerd into dumping that process by exiting after
    608  // sending the request. Validate the trusted request.uid/gid to defend against this.
    609  if (request.action == DEBUGGER_ACTION_CRASH) {
    610    pid_t pid;
    611    uid_t uid;
    612    gid_t gid;
    613    if (get_process_info(request.tid, &pid, &uid, &gid) != 0) {
    614      ALOGE("debuggerd: failed to get process info for tid '%d'", request.tid);
    615      exit(1);
    616    }
    617
    618    if (pid != request.pid || uid != request.uid || gid != request.gid) {
    619      ALOGE(
    620        "debuggerd: attached task %d does not match request: "
    621        "expected pid=%d,uid=%d,gid=%d, actual pid=%d,uid=%d,gid=%d",
    622        request.tid, request.pid, request.uid, request.gid, pid, uid, gid);
    623      exit(1);
    624    }
    625  }
    626
    627  // Don't attach to the sibling threads if we want to attach gdb.
    628  // Supposedly, it makes the process less reliable.
    629  bool attach_gdb = should_attach_gdb(request);
    630  if (attach_gdb) {
    631    // Open all of the input devices we need to listen for VOLUMEDOWN before dropping privileges.
    632    if (init_getevent() != 0) {
    633      ALOGE("debuggerd: failed to initialize input device, not waiting for gdb");
    634      attach_gdb = false;
    635    }
    636
    637  }
    638
    639  std::set<pid_t> siblings;
    640  if (!attach_gdb) {
    641    ptrace_siblings(request.pid, request.tid, siblings);
    642  }
    643
    644  // Generate the backtrace map before dropping privileges.
    645  std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(request.pid));
    646
    647  int amfd = -1;
    648  std::unique_ptr<std::string> amfd_data;
    649  if (request.action == DEBUGGER_ACTION_CRASH) {
    650    // Connect to the activity manager before dropping privileges.
    651    amfd = activity_manager_connect();
    652    amfd_data.reset(new std::string);
    653  }
    654
    655  // Collect the list of open files.
    656  OpenFilesList open_files;
    657  populate_open_files_list(request.pid, &open_files);
    658
    659  bool succeeded = false;
    660
    661  // Now that we've done everything that requires privileges, we can drop them.
    662  if (!drop_privileges()) {
    663    ALOGE("debuggerd: failed to drop privileges, exiting");
    664    _exit(1);
    665  }
    666
    667  int crash_signal = SIGKILL;
    668  succeeded = perform_dump(request, fd, tombstone_fd, backtrace_map.get(), siblings,
    669                           &crash_signal, &open_files, amfd_data.get());
    670  if (succeeded) {
    671    if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
    672      if (!tombstone_path.empty()) {
    673        android::base::WriteFully(fd, tombstone_path.c_str(), tombstone_path.length()); //将dump结果写到相关路径下
    674      }
    675    }
    676  }
    677
    678  if (attach_gdb || request.action == DEBUGGER_ACTION_CRASH) {
    679    // Before detach we must send SIGSTOP to the target.
    680    // Tell the signal process to send SIGSTOP to the target.
    681    if (!send_signal(request.pid, 0, SIGSTOP)) {
    682      ALOGE("debuggerd: failed to stop process for gdb attach: %s", strerror(errno));
    683      attach_gdb = false;
    684    }
    685  }
    686
    687  if (!attach_gdb) {
    688    // Tell the Activity Manager about the crashing process. If we are
    689    // waiting for gdb to attach, do not send this or Activity Manager
    690    // might kill the process before anyone can attach.
    691    activity_manager_write(request.pid, crash_signal, amfd, *amfd_data.get());
    692  }
    693
    694  if (ptrace(PTRACE_DETACH, request.tid, 0, 0) != 0) { //detach客户端
    695    ALOGE("debuggerd: ptrace detach from %d failed: %s", request.tid, strerror(errno));
    696  }
    697
    698  for (pid_t sibling : siblings) {
    699    ptrace(PTRACE_DETACH, sibling, 0, 0);
    700  }
    701
    702  // Send the signal back to the process if it crashed and we're not waiting for gdb.
    703  if (!attach_gdb && request.action == DEBUGGER_ACTION_CRASH) {
    704    if (!send_signal(request.pid, request.tid, crash_signal)) {
    705      ALOGE("debuggerd: failed to kill process %d: %s", request.pid, strerror(errno));
    706    }
    707  }
    708
    709  // Wait for gdb, if requested.
    710  if (attach_gdb) {
    711    wait_for_user_action(request);
    712
    713    // Now tell the activity manager about this process.
    714    activity_manager_write(request.pid, crash_signal, amfd, *amfd_data.get());
    715
    716    // Tell the signal process to send SIGCONT to the target.
    717    if (!send_signal(request.pid, 0, SIGCONT)) {
    718      ALOGE("debuggerd: failed to resume process %d: %s", request.pid, strerror(errno));
    719    }
    720
    721    uninit_getevent();
    722  }
    723
    724  close(amfd);
    725
    726  exit(!succeeded);
    727}
    

    perform_dump:进行dump的过程

    484static bool perform_dump(const debugger_request_t& request, int fd, int tombstone_fd,
    485                         BacktraceMap* backtrace_map, const std::set<pid_t>& siblings,
    486                         int* crash_signal, OpenFilesList* open_files, std::string* amfd_data) {
    487  if (TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1) { //向应用进程(客户端返回一个值),表示连上了,可以开始dump了
    488    ALOGE("debuggerd: failed to respond to client: %s\n", strerror(errno));
    489    return false;
    490  }
    491
    492  int total_sleep_time_usec = 0;
    493  while (true) {
    494    int signal = wait_for_signal(request.tid, &total_sleep_time_usec); //因为此时已经被ptrace_attach了,所以第二次客户端发给自己的信号会在这里被接收
    495    switch (signal) {
    496      case -1:
    497        ALOGE("debuggerd: timed out waiting for signal");
    498        return false;
    499
    500      case SIGSTOP: //这里是attach时向客户端发送的sigstop信号
    501        if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
    502          ALOGV("debuggerd: stopped -- dumping to tombstone");
    503          engrave_tombstone(tombstone_fd, backtrace_map, request.pid, request.tid, siblings, signal,
    504                            request.original_si_code, request.abort_msg_address, open_files, amfd_data); 
    505        } else if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) {
    506          ALOGV("debuggerd: stopped -- dumping to fd");
    507          dump_backtrace(fd, backtrace_map, request.pid, request.tid, siblings, nullptr);
    508        } else {
    509          ALOGV("debuggerd: stopped -- continuing");
                  //此时通过debuggerd用PTRACE_CONT命令让应用继续执行,
                  // 这样应用的read系统调用就可以返回到用户态,继续执行debuggerd_signal_handler()
                   // 此时,debuggerd进入下一次循环,block在wait_for_signal,继续等待应用的下一个信号
    510          if (ptrace(PTRACE_CONT, request.tid, 0, 0) != 0) {
    511            ALOGE("debuggerd: ptrace continue failed: %s", strerror(errno));
    512            return false;
    513          }
    514          continue;  // loop again //注意,这里是继续循环,等待客户端的第二次信号
    515        }
    516        break;
    517
    518      case SIGABRT:
    519      case SIGBUS:
    520      case SIGFPE:
    521      case SIGILL:
    522      case SIGSEGV:
    523#ifdef SIGSTKFLT
    524      case SIGSTKFLT:
    525#endif
    526      case SIGSYS:
    527      case SIGTRAP:
    528        ALOGV("stopped -- fatal signal\n");
    529        *crash_signal = signal;
    530        engrave_tombstone(tombstone_fd, backtrace_map, request.pid, request.tid, siblings, signal,
    531                          request.original_si_code, request.abort_msg_address, open_files, amfd_data); //客户端发的第二次信号被debuggerd接受,开始dump
    532        break; //dump完之后跳出循环,执行下面的操作
    533
    534      default:
    535        ALOGE("debuggerd: process stopped due to unexpected signal %d\n", signal);
    536        break;
    537    }
    538    break;
    539  }
    540
    541  return true;
    542}
    

    本质上有两次通信;
    第一次通信是进程的signal handler通过socket与启动的dubuggerd服务端进行通信,客户端向debuggerd写request,服务端获取request并返回一个值表示收到;同时attach到客户端,作为父进程;同时发送一个SIGSTOP信号,被接收时,此时通过debuggerd用PTRACE_CONT命令让应用继续执行,这样应用的read系统调用就可以返回到用户态,继续执行debuggerd_signal_handler,debuggerd进入下一次循环,block在wait_for_signal,继续等待应用的下一个信号

    客户端收到答复之后,将注册的信号处理函数去掉,(这样再接收到信号就可以正常的走kernel流程了),然后再次发送一个信号

    这里就是第二次通信,信号被父进程debuggerd拦截,开始dump操作,dump操作完后进行detach操作,不再作为客户端的父进程

    此时客户端会进入到默认的信号处理逻辑中

    2173int get_signal(struct ksignal *ksig)
    2174{
    2175    struct sighand_struct *sighand = current->sighand;
    2176    struct signal_struct *signal = current->signal;
    2177    int signr;
    2178
    2179    if (unlikely(current->task_works))
    2180        task_work_run();
    2181
    2182    if (unlikely(uprobe_deny_signal()))
    2183        return 0;
    2184
    2185    /*
    2186     * Do this once, we can't return to user-mode if freezing() == T.
    2187     * do_signal_stop() and ptrace_stop() do freezable_schedule() and
    2188     * thus do not need another check after return.
    2189     */
    2190    try_to_freeze();
    2191
    2192relock:
    2193    spin_lock_irq(&sighand->siglock);
    2194    /*
    2195     * Every stopped thread goes here after wakeup. Check to see if
    2196     * we should notify the parent, prepare_signal(SIGCONT) encodes
    2197     * the CLD_ si_code into SIGNAL_CLD_MASK bits.
    2198     */
    2199    if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
    2200        int why;
    2201
    2202        if (signal->flags & SIGNAL_CLD_CONTINUED)
    2203            why = CLD_CONTINUED;
    2204        else
    2205            why = CLD_STOPPED;
    2206
    2207        signal->flags &= ~SIGNAL_CLD_MASK;
    2208
    2209        spin_unlock_irq(&sighand->siglock);
    2210
    2211        /*
    2212         * Notify the parent that we're continuing.  This event is
    2213         * always per-process and doesn't make whole lot of sense
    2214         * for ptracers, who shouldn't consume the state via
    2215         * wait(2) either, but, for backward compatibility, notify
    2216         * the ptracer of the group leader too unless it's gonna be
    2217         * a duplicate.
    2218         */
    2219        read_lock(&tasklist_lock);
    2220        do_notify_parent_cldstop(current, false, why);
    2221
    2222        if (ptrace_reparented(current->group_leader))
    2223            do_notify_parent_cldstop(current->group_leader,
    2224                        true, why);
    2225        read_unlock(&tasklist_lock);
    2226
    2227        goto relock;
    2228    }
    2229
    2230    for (;;) {
    2231        struct k_sigaction *ka;
    2232
    2233        if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) &&
    2234            do_signal_stop(0))
    2235            goto relock;
    2236
    2237        if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
    2238            do_jobctl_trap();
    2239            spin_unlock_irq(&sighand->siglock);
    2240            goto relock;
    2241        }
    2242
    2243        signr = dequeue_signal(current, &current->blocked, &ksig->info);
    2244
    2245        if (!signr)
    2246            break; /* will return 0 */
    2247
    2248        if (unlikely(current->ptrace) && signr != SIGKILL) {
    2249            signr = ptrace_signal(signr, &ksig->info);
    2250            if (!signr)
    2251                continue;
    2252        }
    2253
    2254        ka = &sighand->action[signr-1];
    2255
    2256        /* Trace actually delivered signals. */
    2257        trace_signal_deliver(signr, &ksig->info, ka);
    2258
    2259        if (ka->sa.sa_handler == SIG_IGN) /* Do nothing.  */
    2260            continue;
    2261        if (ka->sa.sa_handler != SIG_DFL) {
    2262            /* Run the handler.  */
    2263            ksig->ka = *ka;
    2264
    2265            if (ka->sa.sa_flags & SA_ONESHOT)
    2266                ka->sa.sa_handler = SIG_DFL;
    2267
    2268            break; /* will return non-zero "signr" value */
    2269        }
    2270
    2271        /*
    2272         * Now we are doing the default action for this signal.
    2273         */
    2274        if (sig_kernel_ignore(signr)) /* Default is nothing. */
    2275            continue;
    2276
    2277        /*
    2278         * Global init gets no signals it doesn't want.
    2279         * Container-init gets no signals it doesn't want from same
    2280         * container.
    2281         *
    2282         * Note that if global/container-init sees a sig_kernel_only()
    2283         * signal here, the signal must have been generated internally
    2284         * or must have come from an ancestor namespace. In either
    2285         * case, the signal cannot be dropped.
    2286         */
    2287        if (unlikely(signal->flags & SIGNAL_UNKILLABLE) &&
    2288                !sig_kernel_only(signr))
    2289            continue;
    2290
    2291        if (sig_kernel_stop(signr)) {
    2292            /*
    2293             * The default action is to stop all threads in
    2294             * the thread group.  The job control signals
    2295             * do nothing in an orphaned pgrp, but SIGSTOP
    2296             * always works.  Note that siglock needs to be
    2297             * dropped during the call to is_orphaned_pgrp()
    2298             * because of lock ordering with tasklist_lock.
    2299             * This allows an intervening SIGCONT to be posted.
    2300             * We need to check for that and bail out if necessary.
    2301             */
    2302            if (signr != SIGSTOP) {
    2303                spin_unlock_irq(&sighand->siglock);
    2304
    2305                /* signals can be posted during this window */
    2306
    2307                if (is_current_pgrp_orphaned())
    2308                    goto relock;
    2309
    2310                spin_lock_irq(&sighand->siglock);
    2311            }
    2312
    2313            if (likely(do_signal_stop(ksig->info.si_signo))) {
    2314                /* It released the siglock.  */
    2315                goto relock;
    2316            }
    2317
    2318            /*
    2319             * We didn't actually stop, due to a race
    2320             * with SIGCONT or something like that.
    2321             */
    2322            continue;
    2323        }
    2324
    2325        spin_unlock_irq(&sighand->siglock);
    2326
    2327        /*
    2328         * Anything else is fatal, maybe with a core dump.
    2329         */
    2330        current->flags |= PF_SIGNALED;
    2331
    2332        if (sig_kernel_coredump(signr)) {
    2333            if (print_fatal_signals)
    2334                print_fatal_signal(ksig->info.si_signo);
    2335            proc_coredump_connector(current);
    2336            /*
    2337             * If it was able to dump core, this kills all
    2338             * other threads in the group and synchronizes with
    2339             * their demise.  If we lost the race with another
    2340             * thread getting here, it set group_exit_code
    2341             * first and our do_group_exit call below will use
    2342             * that value and ignore the one we pass it.
    2343             */
    2344            do_coredump(&ksig->info);
    2345        }
    2346
    2347        /*
    2348         * Death signals, no core dump.
    2349         */
    2350        do_group_exit(ksig->info.si_signo);
    2351        /* NOTREACHED */
    2352    }
    2353    spin_unlock_irq(&sighand->siglock);
    2354
    2355    ksig->sig = signr;
    2356    return ksig->sig > 0;
    2357}
    
    412#define sig_kernel_coredump(sig) \
    413 (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_COREDUMP_MASK))
    
    399        rt_sigmask(SIGQUIT)   |  rt_sigmask(SIGILL)    | \
    400 rt_sigmask(SIGTRAP)   |  rt_sigmask(SIGABRT)   | \
    401        rt_sigmask(SIGFPE)    |  rt_sigmask(SIGSEGV)   | \
    402 rt_sigmask(SIGBUS)    |  rt_sigmask(SIGSYS)    | \
    403        rt_sigmask(SIGXCPU)   |  rt_sigmask(SIGXFSZ)   | \
    404 SIGEMT_MASK 
    

    可见coredump相应的信号比tombstone多,tombstone响应的为coredump的子集,能响应coredump的信号如下,参考default action列表:

     *      +--------------------+------------------+
     *      |  POSIX signal      |  default action  |
     *      +--------------------+------------------+
     *      |  SIGHUP            |  terminate       |
     *      |  SIGINT            |  terminate       |
     *      |  SIGQUIT           |  coredump        |
     *      |  SIGILL            |  coredump        |
     *      |  SIGTRAP           |  coredump        |
     *      |  SIGABRT/SIGIOT    |  coredump        |
     *      |  SIGBUS            |  coredump        |
     *      |  SIGFPE            |  coredump        |
     *      |  SIGKILL           |  terminate(+)    |
     *      |  SIGUSR1           |  terminate       |
     *      |  SIGSEGV           |  coredump        |
     *      |  SIGUSR2           |  terminate       |
     *      |  SIGPIPE           |  terminate       |
     *      |  SIGALRM           |  terminate       |
     *      |  SIGTERM           |  terminate       |
     *      |  SIGCHLD           |  ignore          |
     *      |  SIGCONT           |  ignore(*)       |
     *      |  SIGSTOP           |  stop(*)(+)      |
     *      |  SIGTSTP           |  stop(*)         |
     *      |  SIGTTIN           |  stop(*)         |
     *      |  SIGTTOU           |  stop(*)         |
     *      |  SIGURG            |  ignore          |
     *      |  SIGXCPU           |  coredump        |
     *      |  SIGXFSZ           |  coredump        |
     *      |  SIGVTALRM         |  terminate       |
     *      |  SIGPROF           |  terminate       |
     *      |  SIGPOLL/SIGIO     |  terminate       |
     *      |  SIGSYS/SIGUNUSED  |  coredump        |
     *      |  SIGSTKFLT         |  terminate       |
     *      |  SIGWINCH          |  ignore          |
     *      |  SIGPWR            |  terminate       |
     *      |  SIGRTMIN-SIGRTMAX |  terminate       |
     *      +--------------------+------------------+
     *      |  non-POSIX signal  |  default action  |
     *      +--------------------+------------------+
     *      |  SIGEMT            |  coredump        |
     *      +--------------------+------------------+
    

    那么如何tombstone添加一个信号呢?

    拓展

    debuggerd_init打不出log?

    原因:
    bionic/linker/linker_main.cpp

    /*
    211 * This code is called after the linker has linked itself and
    212 * fixed it's own GOT. It is safe to make references to externs
    213 * and other non-local data at this point.
    214 */
    215static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args) {
    216  ProtectedDataGuard guard;
    217
    218#if TIMING
    219  struct timeval t0, t1;
    220  gettimeofday(&t0, 0);
    221#endif
    222
    223  // Sanitize the environment.
    224  __libc_init_AT_SECURE(args);
    225
    226  // Initialize system properties
    227  __system_properties_init(); // may use 'environ'
    228
    229  // Register the debuggerd signal handler.
    230#ifdef __ANDROID__
    231  debuggerd_callbacks_t callbacks = {
    232    .get_abort_message = []() {
    233      return g_abort_message;
    234    },
    235    .post_dump = &notify_gdb_of_libraries,
    236  };
    237  debuggerd_init(&callbacks); //此时LD_DEBUG还没有初始化
    238#endif
    239
    240  g_linker_logger.ResetState();
    241
    242  // Get a few environment variables.
    243  const char* LD_DEBUG = getenv("LD_DEBUG");
    244  if (LD_DEBUG != nullptr) {
    245    g_ld_debug_verbosity = atoi(LD_DEBUG);
    246  }
    

    bionic/linker/linker_debug.h

    63#if LINKER_DEBUG_TO_LOG
    64#define _PRINTVF(v, x...) \
    65    do { \
    66      if (g_ld_debug_verbosity > (v)) async_safe_format_log(5-(v), "linker", x); \
    67    } while (0)
    68#else /* !LINKER_DEBUG_TO_LOG */
    69#define _PRINTVF(v, x...) \
    70    do { \
    71      if (g_ld_debug_verbosity > (v)) { async_safe_format_fd(1, x); write(1, "\n", 1); } \
    72    } while (0)
    73#endif /* !LINKER_DEBUG_TO_LOG */
    74
    75#define PRINT(x...)          _PRINTVF(-1, x)
    76#define INFO(x...)           _PRINTVF(0, x)
    77#define TRACE(x...)          _PRINTVF(1, x)
    

    所以用INFO等等,级别不够,可以直接用async_safe_format_log进行打印,就一定能打出来

    相关文章

      网友评论

        本文标题:tombstone与debuggerd相关流程

        本文链接:https://www.haomeiwen.com/subject/hupnfqtx.html