美文网首页PostgreSQL
PostgreSQL 源码解读(124)- 后台进程#4(aut

PostgreSQL 源码解读(124)- 后台进程#4(aut

作者: EthanHe | 来源:发表于2020-07-25 18:23 被阅读0次

    本节简单介绍了PostgreSQL的后台进程:autovacuum,主要分析了AutoVacLauncherMain函数的实现逻辑。

    一、数据结构

    宏定义

    #define GetProcessingMode() Mode
    
    #define SetProcessingMode(mode) \
        do { \
            AssertArg((mode) == BootstrapProcessing || \
                      (mode) == InitProcessing || \
                      (mode) == NormalProcessing); \
            Mode = (mode); \
        } while(0)
    

    二、源码解读

    AutoVacLauncherMain函数,autovacuum进程主循环.

    /*
     * Main loop for the autovacuum launcher process.
     * autovacuum进程主循环
     */
    NON_EXEC_STATIC void
    AutoVacLauncherMain(int argc, char *argv[])
    {
        sigjmp_buf  local_sigjmp_buf;
    
        am_autovacuum_launcher = true;
    
        /* Identify myself via ps */
        //进程ID
        init_ps_display(pgstat_get_backend_desc(B_AUTOVAC_LAUNCHER), "", "", "");
    
        ereport(DEBUG1,
                (errmsg("autovacuum launcher started")));
    
        if (PostAuthDelay)
            pg_usleep(PostAuthDelay * 1000000L);
        //设置进程模式
        SetProcessingMode(InitProcessing);
    
        /*
         * Set up signal handlers.  We operate on databases much like a regular
         * backend, so we use the same signal handling.  See equivalent code in
         * tcop/postgres.c.
         * 设置信号控制器.
         * autovacuum的执行类似于普通的后台进程,因此使用相同的信号控制机制.
         * 参考tcop/postgres.c中的代码.
         */
        pqsignal(SIGHUP, av_sighup_handler);
        pqsignal(SIGINT, StatementCancelHandler);
        pqsignal(SIGTERM, avl_sigterm_handler);
    
        pqsignal(SIGQUIT, quickdie);
        //建立SIGALRM控制器
        InitializeTimeouts();       /* establishes SIGALRM handler */
    
        pqsignal(SIGPIPE, SIG_IGN);//忽略SIGPIPE
        pqsignal(SIGUSR1, procsignal_sigusr1_handler);
        pqsignal(SIGUSR2, avl_sigusr2_handler);
        pqsignal(SIGFPE, FloatExceptionHandler);
        pqsignal(SIGCHLD, SIG_DFL);
    
        /* Early initialization */
        //基础初始化
        BaseInit();
    
        /*
         * Create a per-backend PGPROC struct in shared memory, except in the
         * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
         * this before we can use LWLocks (and in the EXEC_BACKEND case we already
         * had to do some stuff with LWLocks).
         * 在共享内存中创建每个后台进程的PGPROC结构体,
         *   但除了EXEC_BACKEND这种情况,这是在SubPostmasterMain中完成的。
         */
    #ifndef EXEC_BACKEND
        InitProcess();
    #endif
        //初始化
        InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL, false);
        //设置进程模式
        SetProcessingMode(NormalProcessing);
    
        /*
         * Create a memory context that we will do all our work in.  We do this so
         * that we can reset the context during error recovery and thereby avoid
         * possible memory leaks.
         * 创建内存上下文.
         * 之所以这样做是因为我们可以在错误恢复中重置上下文,并且可以避免内存泄漏.
         */
        AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
                                              "Autovacuum Launcher",
                                              ALLOCSET_DEFAULT_SIZES);
        MemoryContextSwitchTo(AutovacMemCxt);
    
        /*
         * If an exception is encountered, processing resumes here.
         * 如果出现异常,在这里重新恢复.
         *
         * This code is a stripped down version of PostgresMain error recovery.
         * 这段代码是PostgresMain错误恢复的精简版。
         */
        if (sigsetjmp(local_sigjmp_buf, 1) != 0)
        {
            /* since not using PG_TRY, must reset error stack by hand */
            //由于没有使用PG_TRY,这里必须手工重置错误.
            error_context_stack = NULL;
    
            /* Prevents interrupts while cleaning up */
            //在清理期间禁用中断
            HOLD_INTERRUPTS();
    
            /* Forget any pending QueryCancel or timeout request */
            //忽略所有QueryCancel或者超时请求
            disable_all_timeouts(false);
            QueryCancelPending = false; /* second to avoid race condition */
    
            /* Report the error to the server log */
            //在服务器日志中记录日志.
            EmitErrorReport();
    
            /* Abort the current transaction in order to recover */
            //废弃当前事务,以准备恢复
            AbortCurrentTransaction();
    
            /*
             * Release any other resources, for the case where we were not in a
             * transaction.
             * 释放所有其他资源,以防我们不在事务中。
             */
            LWLockReleaseAll();
            pgstat_report_wait_end();
            AbortBufferIO();
            UnlockBuffers();
            /* this is probably dead code, but let's be safe: */
            //这可能是dead code,但可以保证安全
            if (AuxProcessResourceOwner)
                ReleaseAuxProcessResources(false);
            AtEOXact_Buffers(false);
            AtEOXact_SMgr();
            AtEOXact_Files(false);
            AtEOXact_HashTables(false);
    
            /*
             * Now return to normal top-level context and clear ErrorContext for
             * next time.
             * 现在切换回正常的顶层上下文中,并为下一次的启动清理错误上下文
             */
            MemoryContextSwitchTo(AutovacMemCxt);
            FlushErrorState();
    
            /* Flush any leaked data in the top-level context */
            //在top-level上下文刷新所有泄漏的数据
            MemoryContextResetAndDeleteChildren(AutovacMemCxt);
    
            /* don't leave dangling pointers to freed memory */
            //不要留下悬空指针来释放内存
            DatabaseListCxt = NULL;
            dlist_init(&DatabaseList);
    
            /*
             * Make sure pgstat also considers our stat data as gone.  Note: we
             * mustn't use autovac_refresh_stats here.
             * 确保pgstat也认为我们的统计数据已经丢弃。
             * 注意:这里不能使用autovac_refresh_stats。
             */
            pgstat_clear_snapshot();
    
            /* Now we can allow interrupts again */
            //可以允许中断了
            RESUME_INTERRUPTS();
    
            /* if in shutdown mode, no need for anything further; just go away */
            //如处于shutdown模式,不需要继续后续的工作了,跳转到shutdown
            if (got_SIGTERM)
                goto shutdown;
    
            /*
             * Sleep at least 1 second after any error.  We don't want to be
             * filling the error logs as fast as we can.
             */
            pg_usleep(1000000L);
        }
    
        /* We can now handle ereport(ERROR) */
        //现在可以处理ereport(ERROR)了
        PG_exception_stack = &local_sigjmp_buf;
    
        /* must unblock signals before calling rebuild_database_list */
        //在调用rebuild_database_list前不能阻塞信号
        PG_SETMASK(&UnBlockSig);
    
        /*
         * Set always-secure search path.  Launcher doesn't connect to a database,
         * so this has no effect.
         * 设置安全的搜索路径.
         * Launcher不能连接数据库,因此并没有什么影响.
         */
        SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
    
        /*
         * Force zero_damaged_pages OFF in the autovac process, even if it is set
         * in postgresql.conf.  We don't really want such a dangerous option being
         * applied non-interactively.
         * 在autovacuum进程中,强制关闭zero_damaged_pages,即时该参数在配置文件设置为ON.
         * 我们真的不希望这样一个危险的选项在无需交互的情况进行应用.
         */
        SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
    
        /*
         * Force settable timeouts off to avoid letting these settings prevent
         * regular maintenance from being executed.
         * 强制关闭可设置的超时,以避免这些设置妨碍常规维护的执行。
         */
        SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
        SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
        SetConfigOption("idle_in_transaction_session_timeout", "0",
                        PGC_SUSET, PGC_S_OVERRIDE);
    
        /*
         * Force default_transaction_isolation to READ COMMITTED.  We don't want
         * to pay the overhead of serializable mode, nor add any risk of causing
         * deadlocks or delaying other transactions.
         * 强制default_transaction_isolation为READ COMMITTED.
         * 我们不希望在serializable模式下增加负担,也不想增加导致死锁或者其他事务延迟的风险.
         */
        SetConfigOption("default_transaction_isolation", "read committed",
                        PGC_SUSET, PGC_S_OVERRIDE);
    
        /*
         * In emergency mode, just start a worker (unless shutdown was requested)
         * and go away.
         * 在紧急模式,启动一个worker(除非shutdown请求)
         */
        if (!AutoVacuumingActive())
        {
            if (!got_SIGTERM)
                do_start_worker();
            proc_exit(0);           /* done */
        }
    
        AutoVacuumShmem->av_launcherpid = MyProcPid;
    
        /*
         * Create the initial database list.  The invariant we want this list to
         * keep is that it's ordered by decreasing next_time.  As soon as an entry
         * is updated to a higher time, it will be moved to the front (which is
         * correct because the only operation is to add autovacuum_naptime to the
         * entry, and time always increases).
         * 创建初始化数据库链表.
         * 我们希望这个链表保持不变的是它是通过减少next_time来进行排序.
         * 一旦条目更新到更高的时间,它就会被移动到前面
         * (这样处理没有问题,因为惟一的操作是向条目添加autovacuum_naptime,而时间总是会增加)。
         */
        rebuild_database_list(InvalidOid);
    
        /* loop until shutdown request */
        //循环,直至请求shutdown
        while (!got_SIGTERM)
        {
            struct timeval nap;
            TimestampTz current_time = 0;
            bool        can_launch;
    
            /*
             * This loop is a bit different from the normal use of WaitLatch,
             * because we'd like to sleep before the first launch of a child
             * process.  So it's WaitLatch, then ResetLatch, then check for
             * wakening conditions.
             * 该循环与常规的使用WaitLatch不同,因为我们希望在第一个子进程启动前处于休眠状态.
             * 因此首先WaitLatch,接着ResetLatch,然后检查并等待唤醒条件.
             */
    
            launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers),
                                     false, &nap);
    
            /*
             * Wait until naptime expires or we get some type of signal (all the
             * signal handlers will wake us by calling SetLatch).
             * 等待,直至naptime超时或者我们接收到某些类型的信号.
             * (所有的信号控制器会通过调用SetLatch唤醒进程)
             */
            (void) WaitLatch(MyLatch,
                             WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
                             (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
                             WAIT_EVENT_AUTOVACUUM_MAIN);
    
            ResetLatch(MyLatch);
    
            /* Process sinval catchup interrupts that happened while sleeping */
            //在休眠过程中,进程会捕获相关的中断.
            ProcessCatchupInterrupt();
    
            /* the normal shutdown case */
            //shutdonw信号
            if (got_SIGTERM)
                break;
    
            if (got_SIGHUP)
            {
                //SIGHUP信号
                got_SIGHUP = false;
                ProcessConfigFile(PGC_SIGHUP);
    
                /* shutdown requested in config file? */
                //在配置文件中已请求shutdown?
                //已处于autovacuum中
                if (!AutoVacuumingActive())
                    break;
    
                /* rebalance in case the default cost parameters changed */
                //如默认的成本参数变化,则自动平衡.
                LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
                autovac_balance_cost();
                LWLockRelease(AutovacuumLock);
    
                /* rebuild the list in case the naptime changed */
                //如naptime出现变化,重建链表
                rebuild_database_list(InvalidOid);
            }
    
            /*
             * a worker finished, or postmaster signalled failure to start a
             * worker
             * 某个worker已完成,或者postmaster信号出现异常无法启动worker
             */
            if (got_SIGUSR2)
            {
                //SIGUSR2信号
                got_SIGUSR2 = false;
    
                /* rebalance cost limits, if needed */
                //如需要,重平衡成本限制
                if (AutoVacuumShmem->av_signal[AutoVacRebalance])
                {
                    LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
                    AutoVacuumShmem->av_signal[AutoVacRebalance] = false;
                    autovac_balance_cost();
                    LWLockRelease(AutovacuumLock);
                }
    
                if (AutoVacuumShmem->av_signal[AutoVacForkFailed])
                {
                    /*
                     * If the postmaster failed to start a new worker, we sleep
                     * for a little while and resend the signal.  The new worker's
                     * state is still in memory, so this is sufficient.  After
                     * that, we restart the main loop.
                     * 如果postmaster无法启动新的worker,休眠一段时间,重新发送信号.
                     * 新的worker的状态仍然在内存中,因此这样处理是OK的.
                     * 再次之后,重新启动主循环.
                     *
                     * XXX should we put a limit to the number of times we retry?
                     * I don't think it makes much sense, because a future start
                     * of a worker will continue to fail in the same way.
                     * 是否增加重试次数的限制?XXX
                     * 我们不想太过敏感,因为某个worker在未来的启动会以同样的方式持续失败.
                     */
                    AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;
                    pg_usleep(1000000L);    /* 1s */
                    SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
                    continue;
                }
            }
    
            /*
             * There are some conditions that we need to check before trying to
             * start a worker.  First, we need to make sure that there is a worker
             * slot available.  Second, we need to make sure that no other worker
             * failed while starting up.
             * 在尝试启动worker前,有一些条件需要检查.
             * 首先,需要确保有可用的worker slot;其次,需要确保worker在启动时没有出现异常.
             */
    
            current_time = GetCurrentTimestamp();
            LWLockAcquire(AutovacuumLock, LW_SHARED);
    
            can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);
    
            if (AutoVacuumShmem->av_startingWorker != NULL)
            {
                int         waittime;
                WorkerInfo  worker = AutoVacuumShmem->av_startingWorker;
    
                /*
                 * We can't launch another worker when another one is still
                 * starting up (or failed while doing so), so just sleep for a bit
                 * more; that worker will wake us up again as soon as it's ready.
                 * We will only wait autovacuum_naptime seconds (up to a maximum
                 * of 60 seconds) for this to happen however.  Note that failure
                 * to connect to a particular database is not a problem here,
                 * because the worker removes itself from the startingWorker
                 * pointer before trying to connect.  Problems detected by the
                 * postmaster (like fork() failure) are also reported and handled
                 * differently.  The only problems that may cause this code to
                 * fire are errors in the earlier sections of AutoVacWorkerMain,
                 * before the worker removes the WorkerInfo from the
                 * startingWorker pointer.
                 * 在某个worker仍然在启动时,不能启动新的worker,因此休眠一段时间;
                 * 另外一个worker在ready后会第一时间唤醒我们.
                 * 只需要等待autovacuum_naptime参数设置的时间(单位秒)(最大为60s).
                 * 注意,在这里不能够连接一个特定的数据库不存在任何问题,因为worker在
                 *   尝试连接时,通过startingWorker指针销毁自己.
                 * 通过postmaster检测到问题(如fork()失败)会报告并且进行不同的处理,
                 *   这里唯一的问题是可能导致这里的处理逻辑在AutoVacWorkerMain的早起触发错误,
                 * 而且是在worker通过startingWorker指针清除WorkerInfo前.
                 */
                waittime = Min(autovacuum_naptime, 60) * 1000;
                if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
                                               waittime))
                {
                    LWLockRelease(AutovacuumLock);
                    LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
    
                    /*
                     * No other process can put a worker in starting mode, so if
                     * startingWorker is still INVALID after exchanging our lock,
                     * we assume it's the same one we saw above (so we don't
                     * recheck the launch time).
                     */
                    if (AutoVacuumShmem->av_startingWorker != NULL)
                    {
                        worker = AutoVacuumShmem->av_startingWorker;
                        worker->wi_dboid = InvalidOid;
                        worker->wi_tableoid = InvalidOid;
                        worker->wi_sharedrel = false;
                        worker->wi_proc = NULL;
                        worker->wi_launchtime = 0;
                        dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
                                        &worker->wi_links);
                        AutoVacuumShmem->av_startingWorker = NULL;
                        elog(WARNING, "worker took too long to start; canceled");
                    }
                }
                else
                    can_launch = false;
            }
            //释放锁
            LWLockRelease(AutovacuumLock);  /* either shared or exclusive */
    
            /* if we can't do anything, just go back to sleep */
            //什么都做不了,继续休眠
            if (!can_launch)
                continue;
    
            /* We're OK to start a new worker */
            //现在可以启动新的worker  
            if (dlist_is_empty(&DatabaseList))
            {
                /*
                 * Special case when the list is empty: start a worker right away.
                 * This covers the initial case, when no database is in pgstats
                 * (thus the list is empty).  Note that the constraints in
                 * launcher_determine_sleep keep us from starting workers too
                 * quickly (at most once every autovacuum_naptime when the list is
                 * empty).
                 * 在链表为空时的特殊情况:正确的启动一个worker.
                 * 这涵盖了刚初始的情况,即pgstats中没有数据库(因此链表为空)。
                 * 请注意,launcher_determine_sleep中的约束使我们不能过快地启动worker
                 * (当链表为空时,最多一次autovacuum_naptime)。
                 */
                launch_worker(current_time);
            }
            else
            {
                /*
                 * because rebuild_database_list constructs a list with most
                 * distant adl_next_worker first, we obtain our database from the
                 * tail of the list.
                 * 因为rebuild_database_list首先用最远的adl_next_worker构造了链表,
                 *   所以我们从链表的尾部获取数据库。
                 */
                avl_dbase  *avdb;
    
                avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
    
                /*
                 * launch a worker if next_worker is right now or it is in the
                 * past
                 * 启动worker:如已超过worker的启动时间.
                 */
                if (TimestampDifferenceExceeds(avdb->adl_next_worker,
                                               current_time, 0))
                    launch_worker(current_time);
            }
        }
    
        /* Normal exit from the autovac launcher is here */
        //常规的退出.
    shutdown:
        ereport(DEBUG1,
                (errmsg("autovacuum launcher shutting down")));
        AutoVacuumShmem->av_launcherpid = 0;
    
        proc_exit(0);               /* done */
    }
    
    
    /*
     * TimestampDifferenceExceeds -- report whether the difference between two
     *      timestamps is >= a threshold (expressed in milliseconds)
     *
     * Both inputs must be ordinary finite timestamps (in current usage,
     * they'll be results from GetCurrentTimestamp()).
     */
    bool
    TimestampDifferenceExceeds(TimestampTz start_time,
                               TimestampTz stop_time,
                               int msec)
    {
        TimestampTz diff = stop_time - start_time;
    
        return (diff >= msec * INT64CONST(1000));
    }
    
    
    /*
     * Return the address of the last element in the list.
     *
     * The list must not be empty.
     */
    #define dlist_tail_element(type, membername, lhead)                         \
        (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node),  \
         ((type *) dlist_tail_element_off(lhead, offsetof(type, membername))))
    
    /* internal support function to get address of tail element's struct */
    static inline void *
    dlist_tail_element_off(dlist_head *head, size_t off)
    {
        Assert(!dlist_is_empty(head));
        return (char *) head->head.prev - off;
    }
    
    

    三、跟踪分析

    N/A

    四、参考资料

    PG Source Code

    相关文章

      网友评论

        本文标题:PostgreSQL 源码解读(124)- 后台进程#4(aut

        本文链接:https://www.haomeiwen.com/subject/luymlktx.html