本篇内容介绍了“怎么理解PostgreSQL的后台进程autovacuum”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
AutoVacuumShmem
主要的autovacuum共享内存结构体,存储在shared memory中,同时WorkerInfo也会存储在其中.
/*-------------
* The main autovacuum shmem struct. On shared memory we store this main
* struct and the array of WorkerInfo structs. This struct keeps:
* 主要的autovacuum共享内存结构体,存储在shared memory中,同时WorkerInfo也会存储在其中.
* 该结构体包括:
*
* av_signal set by other processes to indicate various conditions
* 其他进程设置用于提示不同的条件
* av_launcherpid the PID of the autovacuum launcher
* autovacuum launcher的PID
* av_freeWorkers the WorkerInfo freelist
* WorkerInfo空闲链表
* av_runningWorkers the WorkerInfo non-free queue
* WorkerInfo非空闲队列
* av_startingWorker pointer to WorkerInfo currently being started (cleared by
* the worker itself as soon as it's up and running)
* av_startingWorker指向当前正在启动的WorkerInfo
* av_workItems work item array
* av_workItems 工作条目数组
*
* This struct is protected by AutovacuumLock, except for av_signal and parts
* of the worker list (see above).
* 除了av_signal和worker list的一部分信息,该数据结构通过AutovacuumLock保护
*-------------
*/
typedef struct
{
sig_atomic_t av_signal[AutoVacNumSignals];
pid_t av_launcherpid;
dlist_head av_freeWorkers;
dlist_head av_runningWorkers;
WorkerInfo av_startingWorker;
AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;
avw_dbase
用于跟踪worker中的数据库的结构体
/* struct to keep track of databases in worker */
//用于跟踪worker中的数据库的结构体
typedef struct avw_dbase
{
Oid adw_datid;
char *adw_name;
TransactionId adw_frozenxid;
MultiXactId adw_minmulti;
PgStat_StatDBEntry *adw_entry;
} avw_dbase;
rebuild_database_list用于构建出现变化后的DatabaseList,链表中的数据库应出现在pgstats中,在autovacuum_naptime所设定的时间间隔范围内均匀分布。
比如autovacuum_naptime = 60s,有4个数据库db1->db4,那么每隔60s/4就会有启动一个autovacuum worker对相应的DB进行处理。
可能的一个处理时间序列是:db1->XX(时):XX(分):18(秒),db4->XX:XX:33,db4->XX:XX:48,db4->XX:XX:03
后续如需要对db1->db4进行vacuum,那么db1->db4会在下一个18秒、33秒、48秒和03秒触发autovacuum。
/*
* Build an updated DatabaseList. It must only contain databases that appear
* in pgstats, and must be sorted by next_worker from highest to lowest,
* distributed regularly across the next autovacuum_naptime interval.
* 构建出现变化后的DatabaseList,链表中的数据库应出现在pgstats中,通过next_worker从最高到最低排列,
* 在autovacuum_naptime所设定的间隔范围内均匀分布。
* 比如autovacuum_naptime = 60s,有4个数据库db1->db4,那么每隔60s/4就会有启动一个autovacuum worker对相应的DB进行处理。
* 可能的一个处理时间序列是:db1->XX:XX:18,db4->XX:XX:33,db4->XX:XX:48,db4->XX:XX:03
*
* Receives the Oid of the database that made this list be generated (we call
* this the "new" database, because when the database was already present on
* the list, we expect that this function is not called at all). The
* preexisting list, if any, will be used to preserve the order of the
* databases in the autovacuum_naptime period. The new database is put at the
* end of the interval. The actual values are not saved, which should not be
* much of a problem.
*/
static void
rebuild_database_list(Oid newdb)
{
List *dblist;
ListCell *cell;
MemoryContext newcxt;
MemoryContext oldcxt;
MemoryContext tmpcxt;
HASHCTL hctl;
int score;
int nelems;
HTAB *dbhash;
dlist_iter iter;
/* use fresh stats */
autovac_refresh_stats();
newcxt = AllocSetContextCreate(AutovacMemCxt,
"AV dblist",
ALLOCSET_DEFAULT_SIZES);
tmpcxt = AllocSetContextCreate(newcxt,
"tmp AV dblist",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(tmpcxt);
/*
* Implementing this is not as simple as it sounds, because we need to put
* the new database at the end of the list; next the databases that were
* already on the list, and finally (at the tail of the list) all the
* other databases that are not on the existing list.
* 这里的实现并没有看上去的那么简单,因为需要把新数据库放在链表的末尾;
* 接下来是处理已经在链表上的数据库,最后(在链表的末尾)是处理不在现有链表上的所有其他数据库。
*
* To do this, we build an empty hash table of scored databases. We will
* start with the lowest score (zero) for the new database, then
* increasing scores for the databases in the existing list, in order, and
* lastly increasing scores for all databases gotten via
* get_database_list() that are not already on the hash.
* 为了实现这个目的,构建了一个空的哈希表用于存储数据库(已加权重值)。
* 从最低分值(0)开始,赋予新的数据库,然后为现存在链表中的数据库增加分值,
* 继续为不在链表中的数据库增加分值。
*
* Then we will put all the hash elements into an array, sort the array by
* score, and finally put the array elements into the new doubly linked
* list.
* 完成上述工作后,会把所有哈希表中的元素放到数组中,通过分值进行排序,最后把数组元素放到新的双向链接链表中。
*/
hctl.keysize = sizeof(Oid);
hctl.entrysize = sizeof(avl_dbase);
hctl.hcxt = tmpcxt;
dbhash = hash_create("db hash", 20, &hctl, /* magic number here FIXME */
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
/* start by inserting the new database */
score = 0;//分值从0开始
if (OidIsValid(newdb))
{
avl_dbase *db;
PgStat_StatDBEntry *entry;
/* only consider this database if it has a pgstat entry */
//只关注存在pgstat条目的数据库
entry = pgstat_fetch_stat_dbentry(newdb);
if (entry != NULL)
{
/* we assume it isn't found because the hash was just created */
db = hash_search(dbhash, &newdb, HASH_ENTER, NULL);
/* hash_search already filled in the key */
db->adl_score = score++;
/* next_worker is filled in later */
}
}
/* Now insert the databases from the existing list */
//从现存链表中插入到数据库中
dlist_foreach(iter, &DatabaseList)
{
avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
avl_dbase *db;
bool found;
PgStat_StatDBEntry *entry;
/*
* skip databases with no stat entries -- in particular, this gets rid
* of dropped databases
* 跳过没有统计信息的数据库
*/
entry = pgstat_fetch_stat_dbentry(avdb->adl_datid);
if (entry == NULL)
continue;
db = hash_search(dbhash, &(avdb->adl_datid), HASH_ENTER, &found);
if (!found)
{
/* hash_search already filled in the key */
db->adl_score = score++;
/* next_worker is filled in later */
}
}
/* finally, insert all qualifying databases not previously inserted */
//插入先前没有处理过的数据库
dblist = get_database_list();
foreach(cell, dblist)
{
avw_dbase *avdb = lfirst(cell);
avl_dbase *db;
bool found;
PgStat_StatDBEntry *entry;
/* only consider databases with a pgstat entry */
//只考虑存在pgstat的数据库
entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
if (entry == NULL)
continue;
db = hash_search(dbhash, &(avdb->adw_datid), HASH_ENTER, &found);
/* only update the score if the database was not already on the hash */
if (!found)
{
/* hash_search already filled in the key */
db->adl_score = score++;
/* next_worker is filled in later */
}
}
nelems = score;
/* from here on, the allocated memory belongs to the new list */
MemoryContextSwitchTo(newcxt);
dlist_init(&DatabaseList);
if (nelems > 0)
{
TimestampTz current_time;
int millis_increment;
avl_dbase *dbary;
avl_dbase *db;
HASH_SEQ_STATUS seq;
int i;
/* put all the hash elements into an array */
//放到数组中
dbary = palloc(nelems * sizeof(avl_dbase));
i = 0;
hash_seq_init(&seq, dbhash);
while ((db = hash_seq_search(&seq)) != NULL)
memcpy(&(dbary[i++]), db, sizeof(avl_dbase));
/* sort the array */
//排序
qsort(dbary, nelems, sizeof(avl_dbase), db_comparator);
/*
* Determine the time interval between databases in the schedule. If
* we see that the configured naptime would take us to sleep times
* lower than our min sleep time (which launcher_determine_sleep is
* coded not to allow), silently use a larger naptime (but don't touch
* the GUC variable).
*/
//确定数据库之间的调度间隔:autovacuum_naptime/数据库个数
millis_increment = 1000.0 * autovacuum_naptime / nelems;
if (millis_increment <= MIN_AUTOVAC_SLEEPTIME)
millis_increment = MIN_AUTOVAC_SLEEPTIME * 1.1;
current_time = GetCurrentTimestamp();
/*
* move the elements from the array into the dllist, setting the
* next_worker while walking the array
* 把数组中的元素移到dllist中,在遍历数组时设置next_worker
*/
for (i = 0; i < nelems; i++)
{
avl_dbase *db = &(dbary[i]);
current_time = TimestampTzPlusMilliseconds(current_time,
millis_increment);
db->adl_next_worker = current_time;
/* later elements should go closer to the head of the list */
dlist_push_head(&DatabaseList, &db->adl_node);
}
}
/* all done, clean up memory */
if (DatabaseListCxt != NULL)
MemoryContextDelete(DatabaseListCxt);
MemoryContextDelete(tmpcxt);
DatabaseListCxt = newcxt;
MemoryContextSwitchTo(oldcxt);
}
启动gdb,设置信号处理,设置断点
(gdb) b rebuild_database_list
Breakpoint 1 at 0x82eb2a: file autovacuum.c, line 931.
(gdb) handle SIGINT print nostop pass
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n) y
Signal Stop Print Pass to program Description
SIGINT No Yes Yes Interrupt
(gdb) c
Continuing.
^C
Program received signal SIGINT, Interrupt.
进入断点
Breakpoint 1, rebuild_database_list (newdb=0) at autovacuum.c:931
931 autovac_refresh_stats();
(gdb) n
933 newcxt = AllocSetContextCreate(AutovacMemCxt,
(gdb)
936 tmpcxt = AllocSetContextCreate(newcxt,
(gdb)
939 oldcxt = MemoryContextSwitchTo(tmpcxt);
(gdb)
957 hctl.keysize = sizeof(Oid);
(gdb)
958 hctl.entrysize = sizeof(avl_dbase);
(gdb)
959 hctl.hcxt = tmpcxt;
查看统计信息文件:pg_stat_tmp/global.stat
(gdb) p *pgstat_stat_filename
$1 = 112 'p'
(gdb) p pgstat_stat_filename
$2 = 0x203d7e0 "pg_stat_tmp/global.stat"
(gdb) n
960 dbhash = hash_create("db hash", 20, &hctl, /* magic number here FIXME */
(gdb)
###
[pg12@localhost pg_stat_tmp]$ pwd
/data/pgsql/pg121db/pg_stat_tmp
[pg12@localhost pg_stat_tmp]$ ll
total 4
-rw------- 1 pg12 pg12 237 Dec 11 16:40 global.stat
[pg12@localhost pg_stat_tmp]$
构建需处理的数据库链表
964 score = 0;
(gdb)
965 if (OidIsValid(newdb))
(gdb) p *hctl
Structure has no component named operator*.
(gdb) p hctl
$3 = {num_partitions = 140725872814104, ssize = 34131296, dsize = 32, max_dsize = 0, ffactor = 257, keysize = 4, entrysize = 40, hash = 0xc6afd3, match = 0x208cd60, keycopy = 0x0, alloc = 0x1, hcxt = 0x2090d80,
hctl = 0xfe3a00 <ConfigureNamesString+4864>}
(gdb) n
984 dlist_foreach(iter, &DatabaseList)
(gdb) p *DatabaseList
Structure has no component named operator*.
(gdb) p DatabaseList
$4 = {head = {prev = 0xfd9880 <DatabaseList>, next = 0xfd9880 <DatabaseList>}}
(gdb) n
1010 dblist = get_database_list();
(gdb)
1011 foreach(cell, dblist)
(gdb) p *dblist
$5 = {type = T_List, length = 7, head = 0x2090ef8, tail = 0x2091240}
(gdb) p *dblist->head
$6 = {data = {ptr_value = 0x2090e98, int_value = 34147992, oid_value = 34147992}, next = 0x2090fb0}
(gdb) p *(Node *)dblist->head->data.ptr_value
$7 = {type = 13591}
(gdb) p *dblist->head->data.ptr_value
Attempt to dereference a generic pointer.
(gdb) n
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
如没有统计信息,则不予处理
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb) p *avdb
$8 = {adw_datid = 13591, adw_name = 0x2090ed0 "postgres", adw_frozenxid = 479, adw_minmulti = 1, adw_entry = 0x0}
(gdb) n
1020 if (entry == NULL)
(gdb)
1011 foreach(cell, dblist)
(gdb)
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb) p *avdb
$9 = {adw_datid = 16384, adw_name = 0x2090f90 "testdb", adw_frozenxid = 2921, adw_minmulti = 1, adw_entry = 0x0}
(gdb) step
pgstat_fetch_stat_dbentry (dbid=16384) at pgstat.c:2438
2438 backend_read_statsfile();
(gdb) step
backend_read_statsfile () at pgstat.c:5644
5644 TimestampTz min_ts = 0;
(gdb) n
5645 TimestampTz ref_ts = 0;
(gdb)
5650 if (pgStatDBHash)
(gdb)
5651 return;
(gdb)
5766 }
(gdb)
pgstat_fetch_stat_dbentry (dbid=16384) at pgstat.c:2443
2443 return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
(gdb)
2446 }
(gdb)
rebuild_database_list (newdb=0) at autovacuum.c:1020
1020 if (entry == NULL)
(gdb) n
1011 foreach(cell, dblist)
(gdb)
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb)
1020 if (entry == NULL)
(gdb)
1011 foreach(cell, dblist)
(gdb)
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb)
1020 if (entry == NULL)
(gdb)
1011 foreach(cell, dblist)
(gdb)
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb)
1020 if (entry == NULL)
(gdb)
1011 foreach(cell, dblist)
(gdb)
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb)
1020 if (entry == NULL)
(gdb)
1011 foreach(cell, dblist)
(gdb)
1013 avw_dbase *avdb = lfirst(cell);
(gdb)
1019 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
(gdb)
1020 if (entry == NULL)
(gdb)
1011 foreach(cell, dblist)
(gdb)
1032 nelems = score;
(gdb)
1035 MemoryContextSwitchTo(newcxt);
(gdb) n
1036 dlist_init(&DatabaseList);
(gdb)
所有数据库都不需要处理,返回
1038 if (nelems > 0)
(gdb) p nelems
$10 = 0
(gdb) n
1089 if (DatabaseListCxt != NULL)
(gdb)
1091 MemoryContextDelete(tmpcxt);
(gdb)
1092 DatabaseListCxt = newcxt;
(gdb)
1093 MemoryContextSwitchTo(oldcxt);
(gdb)
1094 }
(gdb)
AutoVacLauncherMain (argc=0, argv=0x0) at autovacuum.c:625
625 while (!got_SIGTERM)
(gdb)
“怎么理解PostgreSQL的后台进程autovacuum”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站,小编将为大家输出更多高质量的实用文章!
亿速云「云服务器」,即开即用、新一代英特尔至强铂金CPU、三副本存储NVMe SSD云盘,价格低至29元/月。点击查看>>
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。
原文链接:http://blog.itpub.net/6906/viewspace-2668119/