本篇内容介绍了“PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
宏定义
#define GetProcessingMode() Mode #define SetProcessingMode(mode) \ do { \ AssertArg((mode) == BootstrapProcessing || \ (mode) == InitProcessing || \ (mode) == NormalProcessing); \ Mode = (mode); \ } while(0)
AutoVacLauncherMain函数,autovacuum进程主循环.
/* * Main loop for the autovacuum launcher process. * autovacuum进程主循环 */ NON_EXEC_STATIC void AutoVacLauncherMain(int argc, char *argv[]) { sigjmp_buf local_sigjmp_buf; am_autovacuum_launcher = true; /* Identify myself via ps */ //进程ID init_ps_display(pgstat_get_backend_desc(B_AUTOVAC_LAUNCHER), "", "", ""); ereport(DEBUG1, (errmsg("autovacuum launcher started"))); if (PostAuthDelay) pg_usleep(PostAuthDelay * 1000000L); //设置进程模式 SetProcessingMode(InitProcessing); /* * Set up signal handlers. We operate on databases much like a regular * backend, so we use the same signal handling. See equivalent code in * tcop/postgres.c. * 设置信号控制器. * autovacuum的执行类似于普通的后台进程,因此使用相同的信号控制机制. * 参考tcop/postgres.c中的代码. */ pqsignal(SIGHUP, av_sighup_handler); pqsignal(SIGINT, StatementCancelHandler); pqsignal(SIGTERM, avl_sigterm_handler); pqsignal(SIGQUIT, quickdie); //建立SIGALRM控制器 InitializeTimeouts(); /* establishes SIGALRM handler */ pqsignal(SIGPIPE, SIG_IGN);//忽略SIGPIPE pqsignal(SIGUSR1, procsignal_sigusr1_handler); pqsignal(SIGUSR2, avl_sigusr2_handler); pqsignal(SIGFPE, FloatExceptionHandler); pqsignal(SIGCHLD, SIG_DFL); /* Early initialization */ //基础初始化 BaseInit(); /* * Create a per-backend PGPROC struct in shared memory, except in the * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do * this before we can use LWLocks (and in the EXEC_BACKEND case we already * had to do some stuff with LWLocks). * 在共享内存中创建每个后台进程的PGPROC结构体, * 但除了exEXEC_BACKEND这种情况,这是在SubPostmasterMain中完成的。 */ #ifndef EXEC_BACKEND InitProcess(); #endif //初始化 InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL, false); //设置进程模式 SetProcessingMode(NormalProcessing); /* * Create a memory context that we will do all our work in. We do this so * that we can reset the context during error recovery and thereby avoid * possible memory leaks. * 创建内存上下文. * 之所以这样做是因为我们可以在错误恢复中重置上下文,并且可以避免内存泄漏. */ AutovacMemCxt = AllocSetContextCreate(TopMemoryContext, "Autovacuum Launcher", ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(AutovacMemCxt); /* * If an exception is encountered, processing resumes here. * 如果出现异常,在这里重新恢复. * * This code is a stripped down version of PostgresMain error recovery. * 这段代码是PostgresMain错误恢复的精简版。 */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { /* since not using PG_TRY, must reset error stack by hand */ //由于没有使用PG_TRY,这里必须手工重置错误. error_context_stack = NULL; /* Prevents interrupts while cleaning up */ //在清理期间禁用中断 HOLD_INTERRUPTS(); /* Forget any pending QueryCancel or timeout request */ //忽略所有QueryCancel或者超时请求 disable_all_timeouts(false); QueryCancelPending = false; /* second to avoid race condition */ /* Report the error to the server log */ //在服务器日志中记录日志. EmitErrorReport(); /* Abort the current transaction in order to recover */ //废弃当前事务,以准备恢复 AbortCurrentTransaction(); /* * Release any other resources, for the case where we were not in a * transaction. * 释放任何其他资源,以防我们不在事务中。 */ LWLockReleaseAll(); pgstat_report_wait_end(); AbortBufferIO(); UnlockBuffers(); /* this is probably dead code, but let's be safe: */ //这可能是dead code,但可以保证安全 if (AuxProcessResourceOwner) ReleaseAuxProcessResources(false); AtEOXact_Buffers(false); AtEOXact_SMgr(); AtEOXact_Files(false); AtEOXact_HashTables(false); /* * Now return to normal top-level context and clear ErrorContext for * next time. * 现在切换回正常的顶层上下文中,并为下一次的启动清理错误上下文 */ MemoryContextSwitchTo(AutovacMemCxt); FlushErrorState(); /* Flush any leaked data in the top-level context */ //在top-level上下文刷新所有泄漏的数据 MemoryContextResetAndDeleteChildren(AutovacMemCxt); /* don't leave dangling pointers to freed memory */ //不要留下悬空指针来释放内存 DatabaseListCxt = NULL; dlist_init(&DatabaseList); /* * Make sure pgstat also considers our stat data as gone. Note: we * mustn't use autovac_refresh_stats here. * 确保pgstat也认为我们的统计数据已经丢弃。 * 注意:这里不能使用autovac_refresh_stats。 */ pgstat_clear_snapshot(); /* Now we can allow interrupts again */ //可以允许中断了 RESUME_INTERRUPTS(); /* if in shutdown mode, no need for anything further; just go away */ //如处于shutdown模式,不需要继续后续的工作了,跳转到shutdown if (got_SIGTERM) goto shutdown; /* * Sleep at least 1 second after any error. We don't want to be * filling the error logs as fast as we can. */ pg_usleep(1000000L); } /* We can now handle ereport(ERROR) */ //现在可以处理ereport(ERROR)了 PG_exception_stack = &local_sigjmp_buf; /* must unblock signals before calling rebuild_database_list */ //在调用rebuild_database_list前不能阻塞信号 PG_SETMASK(&UnBlockSig); /* * Set always-secure search path. Launcher doesn't connect to a database, * so this has no effect. * 设置安全的搜索路径. * Launcher不能连接数据库,因此并没有什么影响. */ SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE); /* * Force zero_damaged_pages OFF in the autovac process, even if it is set * in postgresql.conf. We don't really want such a dangerous option being * applied non-interactively. * 在autovacuum进程中,强制关闭zero_damaged_pages,即时该参数在配置文件设置为ON. * 我们真的不希望这样一个危险的选项在无需交互的情况进行应用. */ SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE); /* * Force settable timeouts off to avoid letting these settings prevent * regular maintenance from being executed. * 强制关闭可设置的超时,以避免这些设置妨碍常规维护的执行。 */ SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE); SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE); SetConfigOption("idle_in_transaction_session_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE); /* * Force default_transaction_isolation to READ COMMITTED. We don't want * to pay the overhead of serializable mode, nor add any risk of causing * deadlocks or delaying other transactions. * 强制default_transaction_isolation为READ COMMITTED. * 我们不希望在serializable模式下增加负担,也不想增加导致死锁或者其他事务延迟的风险. */ SetConfigOption("default_transaction_isolation", "read committed", PGC_SUSET, PGC_S_OVERRIDE); /* * In emergency mode, just start a worker (unless shutdown was requested) * and go away. * 在紧急模式,启动一个worker(除非已请求shutdown) */ if (!AutoVacuumingActive()) { if (!got_SIGTERM) do_start_worker(); proc_exit(0); /* done */ } AutoVacuumShmem->av_launcherpid = MyProcPid; /* * Create the initial database list. The invariant we want this list to * keep is that it's ordered by decreasing next_time. As soon as an entry * is updated to a higher time, it will be moved to the front (which is * correct because the only operation is to add autovacuum_naptime to the * entry, and time always increases). * 创建初始化数据库链表. * 我们希望这个链表保持不变的是它是通过减少next_time来进行排序. * 一旦条目更新到更高的时间,它就会被移动到前面 * (这样处理没有问题,因为惟一的操作是向条目添加autovacuum_naptime,而时间总是会增加)。 */ rebuild_database_list(InvalidOid); /* loop until shutdown request */ //循环,直至请求shutdown while (!got_SIGTERM) { struct timeval nap; TimestampTz current_time = 0; bool can_launch; /* * This loop is a bit different from the normal use of WaitLatch, * because we'd like to sleep before the first launch of a child * process. So it's WaitLatch, then ResetLatch, then check for * wakening conditions. * 该循环与常规的使用WaitLatch不同,因为我们希望在第一个子进程启动前处于休眠状态. * 因此首先是WaitLatch,然后是ResetLatch,然后检查并等待唤醒条件. */ launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers), false, &nap); /* * Wait until naptime expires or we get some type of signal (all the * signal handlers will wake us by calling SetLatch). * 等待,直至naptime超时或者我们接收到某些类型的信号. * (所有的信号控制器会通过调用SetLatch唤醒进程) */ (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L), WAIT_EVENT_AUTOVACUUM_MAIN); ResetLatch(MyLatch); /* Process sinval catchup interrupts that happened while sleeping */ //在休眠过程中,进程会捕获相关的中断. ProcessCatchupInterrupt(); /* the normal shutdown case */ //shutdonw信号 if (got_SIGTERM) break; if (got_SIGHUP) { //SIGHUP信号 got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); /* shutdown requested in config file? */ //在配置文件中已请求shutdown? if (!AutoVacuumingActive()) break; /* rebalance in case the default cost parameters changed */ //如默认的成本参数变化,则自动平衡. LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); autovac_balance_cost(); LWLockRelease(AutovacuumLock); /* rebuild the list in case the naptime changed */ //如naptime出现变化,重建链表 rebuild_database_list(InvalidOid); } /* * a worker finished, or postmaster signalled failure to start a * worker * 某个worker已完成,或者postmaster信号出现异常无法启动worker */ if (got_SIGUSR2) { //SIGUSR2信号 got_SIGUSR2 = false; /* rebalance cost limits, if needed */ //如需要,重平衡成本限制 if (AutoVacuumShmem->av_signal[AutoVacRebalance]) { LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); AutoVacuumShmem->av_signal[AutoVacRebalance] = false; autovac_balance_cost(); LWLockRelease(AutovacuumLock); } if (AutoVacuumShmem->av_signal[AutoVacForkFailed]) { /* * If the postmaster failed to start a new worker, we sleep * for a little while and resend the signal. The new worker's * state is still in memory, so this is sufficient. After * that, we restart the main loop. * 如果postmaster无法启动新的worker,休眠一段时间,重新发送信号. * 新的worker的状态仍然在内存中,因此这样处理是OK的. * 再次之后,重新启动主循环. * * XXX should we put a limit to the number of times we retry? * I don't think it makes much sense, because a future start * of a worker will continue to fail in the same way. * 是否增加重试次数的限制?XXX * 我们不想太过敏感,因为某个worker在未来的启动会以同样的方式持续失败. */ AutoVacuumShmem->av_signal[AutoVacForkFailed] = false; pg_usleep(1000000L); /* 1s */ SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER); continue; } } /* * There are some conditions that we need to check before trying to * start a worker. First, we need to make sure that there is a worker * slot available. Second, we need to make sure that no other worker * failed while starting up. * 在尝试启动worker前,有一些条件需要检查. * 首先,需要确保有可用的worker slot;其次,需要确保worker在启动时没有出现异常. */ current_time = GetCurrentTimestamp(); LWLockAcquire(AutovacuumLock, LW_SHARED); can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers); if (AutoVacuumShmem->av_startingWorker != NULL) { int waittime; WorkerInfo worker = AutoVacuumShmem->av_startingWorker; /* * We can't launch another worker when another one is still * starting up (or failed while doing so), so just sleep for a bit * more; that worker will wake us up again as soon as it's ready. * We will only wait autovacuum_naptime seconds (up to a maximum * of 60 seconds) for this to happen however. Note that failure * to connect to a particular database is not a problem here, * because the worker removes itself from the startingWorker * pointer before trying to connect. Problems detected by the * postmaster (like fork() failure) are also reported and handled * differently. The only problems that may cause this code to * fire are errors in the earlier sections of AutoVacWorkerMain, * before the worker removes the WorkerInfo from the * startingWorker pointer. * 在某个worker仍然在启动时,不能启动新的worker,因此休眠一段时间; * 另外一个worker在ready后会第一时间唤醒我们. * 只需要等待autovacuum_naptime参数设置的时间(单位秒)(最大为60s). * 注意,在这里不能够连接一个特定的数据库不存在任何问题,因为worker在 * 尝试连接时,通过startingWorker指针销毁自己. * 通过postmaster检测到问题(如fork()失败)会报告并且进行不同的处理, * 这里唯一的问题是可能导致这里的处理逻辑在AutoVacWorkerMain的早起触发错误, * 而且实在worker通过startingWorker指针清除WorkerInfo前. */ waittime = Min(autovacuum_naptime, 60) * 1000; if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time, waittime)) { LWLockRelease(AutovacuumLock); LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); /* * No other process can put a worker in starting mode, so if * startingWorker is still INVALID after exchanging our lock, * we assume it's the same one we saw above (so we don't * recheck the launch time). */ if (AutoVacuumShmem->av_startingWorker != NULL) { worker = AutoVacuumShmem->av_startingWorker; worker->wi_dboid = InvalidOid; worker->wi_tableoid = InvalidOid; worker->wi_sharedrel = false; worker->wi_proc = NULL; worker->wi_launchtime = 0; dlist_push_head(&AutoVacuumShmem->av_freeWorkers, &worker->wi_links); AutoVacuumShmem->av_startingWorker = NULL; elog(WARNING, "worker took too long to start; canceled"); } } else can_launch = false; } //释放锁 LWLockRelease(AutovacuumLock); /* either shared or exclusive */ /* if we can't do anything, just go back to sleep */ //什么都做不了,继续休眠 if (!can_launch) continue; /* We're OK to start a new worker */ //现在可以启动新的worker if (dlist_is_empty(&DatabaseList)) { /* * Special case when the list is empty: start a worker right away. * This covers the initial case, when no database is in pgstats * (thus the list is empty). Note that the constraints in * launcher_determine_sleep keep us from starting workers too * quickly (at most once every autovacuum_naptime when the list is * empty). * 在链表为空时的特殊情况:正确的启动一个worker. * 这涵盖了刚初始的情况,即pgstats中没有数据库(因此链表为空)。 * 请注意,launcher_determine_sleep中的约束使我们不能过快地启动worker * (当链表为空时,最多一次autovacuum_naptime)。 */ launch_worker(current_time); } else { /* * because rebuild_database_list constructs a list with most * distant adl_next_worker first, we obtain our database from the * tail of the list. * 因为rebuild_database_list首先用最远的adl_next_worker构造了链表, * 所以我们从链表的尾部获取数据库。 */ avl_dbase *avdb; avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList); /* * launch a worker if next_worker is right now or it is in the * past * 启动worker,如果next_worker正当其时或者已成为过去时. */ if (TimestampDifferenceExceeds(avdb->adl_next_worker, current_time, 0)) launch_worker(current_time); } } /* Normal exit from the autovac launcher is here */ //常规的退出. shutdown: ereport(DEBUG1, (errmsg("autovacuum launcher shutting down"))); AutoVacuumShmem->av_launcherpid = 0; proc_exit(0); /* done */ }
“PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站,小编将为大家输出更多高质量的实用文章!
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。