Skip to content

Commit

Permalink
[PBCKP-218] Incremental restore and missing pg_control (issue #304)
Browse files Browse the repository at this point in the history
- pg_control file backup after all other files in backup
- pg_control file restore last in full restore
- rename pg_control to pg_control.pbk.bak at start of non-full restore
- remove pg_control.pbk.bak in the end of successfull non-full restore
- use pg_control.pbk.bak after failed non-full restore
- added tests for full and incremental restore

Tags: backup, catchup, restore
  • Loading branch information
Oleg Gurev committed Dec 22, 2023
1 parent d26df12 commit 52e47fe
Show file tree
Hide file tree
Showing 9 changed files with 354 additions and 83 deletions.
58 changes: 48 additions & 10 deletions src/backup.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
char pretty_time[20];
char pretty_bytes[20];

pgFile *src_pg_control_file = NULL;

elog(INFO, "Database backup start");
if(current.external_dir_str)
{
Expand Down Expand Up @@ -424,6 +426,24 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,

}

/*
* find pg_control file
* We'll copy it last
*/
{
int control_file_elem_index;
pgFile search_key;
MemSet(&search_key, 0, sizeof(pgFile));
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
search_key.rel_path = XLOG_CONTROL_FILE;
search_key.external_dir_num = 0;
control_file_elem_index = parray_bsearch_index(backup_files_list, &search_key, pgFileCompareRelPathWithExternal);

if (control_file_elem_index < 0)
elog(ERROR, "File \"%s\" not found in PGDATA %s", XLOG_CONTROL_FILE, current.database_dir);
src_pg_control_file = (pgFile *)parray_get(backup_files_list, control_file_elem_index);
}

/* setup thread locks */
pfilearray_clear_locks(backup_files_list);

Expand Down Expand Up @@ -483,6 +503,26 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
backup_isok = false;
}

/* copy pg_control at very end */
if (backup_isok)
{

elog(progress ? INFO : LOG, "Progress: Backup file \"%s\"",
src_pg_control_file->rel_path);

char from_fullpath[MAXPGPATH];
char to_fullpath[MAXPGPATH];
join_path_components(from_fullpath, instance_config.pgdata, src_pg_control_file->rel_path);
join_path_components(to_fullpath, current.database_dir, src_pg_control_file->rel_path);

backup_non_data_file(src_pg_control_file, NULL,
from_fullpath, to_fullpath,
current.backup_mode, current.parent_backup,
true);
}



time(&end_time);
pretty_time_interval(difftime(end_time, start_time),
pretty_time, lengthof(pretty_time));
Expand Down Expand Up @@ -510,17 +550,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
{
pgFile *pg_control = NULL;

for (i = 0; i < parray_num(backup_files_list); i++)
{
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
pg_control = src_pg_control_file;

if (tmp_file->external_dir_num == 0 &&
(strcmp(tmp_file->rel_path, XLOG_CONTROL_FILE) == 0))
{
pg_control = tmp_file;
break;
}
}

if (!pg_control)
elog(ERROR, "Failed to find file \"%s\" in backup filelist.",
Expand Down Expand Up @@ -2076,6 +2107,13 @@ backup_files(void *arg)
/* We have already copied all directories */
if (S_ISDIR(file->mode))
continue;
/*
* Don't copy the pg_control file now, we'll copy it last
*/
if(file->external_dir_num == 0 && pg_strcasecmp(file->rel_path, XLOG_CONTROL_FILE) == 0)
{
continue;
}

if (arguments->thread_num == 1)
{
Expand Down
46 changes: 43 additions & 3 deletions src/catchup.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,13 @@ catchup_preflight_checks(PGNodeInfo *source_node_info, PGconn *source_conn,

if (current.backup_mode != BACKUP_MODE_FULL)
{
dest_id = get_system_identifier(dest_pgdata, FIO_LOCAL_HOST, false);
ControlFileData dst_control;
get_control_file_or_back_file(dest_pgdata, FIO_LOCAL_HOST, &dst_control);
dest_id = dst_control.system_identifier;

if (source_conn_id != dest_id)
elog(ERROR, "Database identifiers mismatch: we connected to DB id %lu, but in \"%s\" we found id %lu",
source_conn_id, dest_pgdata, dest_id);
elog(ERROR, "Database identifiers mismatch: we connected to DB id %llu, but in \"%s\" we found id %llu",
(long long)source_conn_id, dest_pgdata, (long long)dest_id);
}
}

Expand Down Expand Up @@ -640,6 +643,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
ssize_t transfered_walfiles_bytes = 0;
char pretty_source_bytes[20];

char dest_pg_control_fullpath[MAXPGPATH];
char dest_pg_control_bak_fullpath[MAXPGPATH];

source_conn = catchup_init_state(&source_node_info, source_pgdata, dest_pgdata);
catchup_preflight_checks(&source_node_info, source_conn, source_pgdata, dest_pgdata);

Expand Down Expand Up @@ -935,6 +941,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
Assert(file->external_dir_num == 0);
if (pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
redundant = true;
/* global/pg_control.pbk.bak is always keeped, because it's needed for restart failed incremental restore */
if (pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
redundant = false;

/* if file does not exists in destination list, then we can safely unlink it */
if (redundant)
Expand Down Expand Up @@ -966,6 +975,28 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
if (dest_filelist)
parray_qsort(dest_filelist, pgFileCompareRelPathWithExternal);

join_path_components(dest_pg_control_fullpath, dest_pgdata, XLOG_CONTROL_FILE);
join_path_components(dest_pg_control_bak_fullpath, dest_pgdata, XLOG_CONTROL_BAK_FILE);
/*
* rename (if it exist) dest control file before restoring
* if it doesn't exist, that mean, that we already restoring in a previously failed
* pgdata, where XLOG_CONTROL_BAK_FILE exist
*/
if (current.backup_mode != BACKUP_MODE_FULL && !dry_run)
{
if (!fio_access(dest_pg_control_fullpath, F_OK, FIO_LOCAL_HOST))
{
pgFile *dst_control;
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
true,0, FIO_BACKUP_HOST);

if(!fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST))
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
pgFileFree(dst_control);
}
}

/* run copy threads */
elog(INFO, "Start transferring data files");
time(&start_time);
Expand All @@ -985,6 +1016,15 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
copy_pgcontrol_file(from_fullpath, FIO_DB_HOST,
to_fullpath, FIO_LOCAL_HOST, source_pg_control_file);
transfered_datafiles_bytes += source_pg_control_file->size;

/* Now backup control file can be deled */
if (current.backup_mode != BACKUP_MODE_FULL && !fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST)){
pgFile *dst_control;
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
true,0, FIO_BACKUP_HOST);
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
pgFileFree(dst_control);
}
}

if (!catchup_isok && !dry_run)
Expand Down
2 changes: 1 addition & 1 deletion src/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1867,4 +1867,4 @@ set_forkname(pgFile *file)
file->segno = segno;
file->is_datafile = file->forkName == none;
return true;
}
}
3 changes: 3 additions & 0 deletions src/pg_probackup.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ extern const char *PROGRAM_EMAIL;
#define DATABASE_MAP "database_map"
#define HEADER_MAP "page_header_map"
#define HEADER_MAP_TMP "page_header_map_tmp"
#define XLOG_CONTROL_BAK_FILE XLOG_CONTROL_FILE".pbk.bak"

/* default replication slot names */
#define DEFAULT_TEMP_SLOT_NAME "pg_probackup_slot";
Expand Down Expand Up @@ -1209,6 +1210,8 @@ extern uint32 get_xlog_seg_size(const char *pgdata_path);
extern void get_redo(const char *pgdata_path, fio_location pgdata_location, RedoParams *redo);
extern void set_min_recovery_point(pgFile *file, const char *backup_path,
XLogRecPtr stop_backup_lsn);
extern void get_control_file_or_back_file(const char *pgdata_path, fio_location location,
ControlFileData *control);
extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location,
const char *to_fullpath, fio_location to_location, pgFile *file);

Expand Down
83 changes: 82 additions & 1 deletion src/restore.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ typedef struct
int ret;
} restore_files_arg;

static bool control_downloaded = false;
static ControlFileData instance_control;

static void
print_recovery_settings(InstanceState *instanceState, FILE *fp, pgBackup *backup,
Expand Down Expand Up @@ -501,6 +503,9 @@ do_restore_or_validate(InstanceState *instanceState, time_t target_backup_id, pg
if (redo.checksum_version == 0)
elog(ERROR, "Incremental restore in 'lsn' mode require "
"data_checksums to be enabled in destination data directory");
if (!control_downloaded)
get_control_file_or_back_file(instance_config.pgdata, FIO_DB_HOST,
&instance_control);

timelines = read_timeline_history(instanceState->instance_wal_subdir_path,
redo.tli, false);
Expand Down Expand Up @@ -719,6 +724,10 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
parray *pgdata_files = NULL;
parray *dest_files = NULL;
parray *external_dirs = NULL;
pgFile *dest_pg_control_file = NULL;
char dest_pg_control_fullpath[MAXPGPATH];
char dest_pg_control_bak_fullpath[MAXPGPATH];

/* arrays with meta info for multi threaded backup */
pthread_t *threads;
restore_files_arg *threads_args;
Expand Down Expand Up @@ -922,6 +931,11 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
redundant = true;

/* global/pg_control.pbk.bak are always keeped, because it's needed for restart failed incremental restore */
if (file->external_dir_num == 0 &&
pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
redundant = false;

/* do not delete the useful internal directories */
if (S_ISDIR(file->mode) && !redundant)
continue;
Expand Down Expand Up @@ -974,6 +988,42 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
dest_bytes = dest_backup->pgdata_bytes;

pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes));
/*
* [Issue #313]
* find pg_control file (in already sorted earlier dest_files, see parray_qsort(backup->files...))
* and exclude it from list for future special processing
*/
{
int control_file_elem_index;
pgFile search_key;
MemSet(&search_key, 0, sizeof(pgFile));
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
search_key.rel_path = XLOG_CONTROL_FILE;
search_key.external_dir_num = 0;
control_file_elem_index = parray_bsearch_index(dest_files, &search_key, pgFileCompareRelPathWithExternal);

if (control_file_elem_index < 0)
elog(ERROR, "File \"%s\" not found in backup %s", XLOG_CONTROL_FILE, base36enc(dest_backup->start_time));
dest_pg_control_file = (pgFile *) parray_get(dest_files, control_file_elem_index);
parray_remove(dest_files, control_file_elem_index);

join_path_components(dest_pg_control_fullpath, pgdata_path, XLOG_CONTROL_FILE);
join_path_components(dest_pg_control_bak_fullpath, pgdata_path, XLOG_CONTROL_BAK_FILE);
/*
* rename (if it exist) dest control file before restoring
* if it doesn't exist, that mean, that we already restoring in a previously failed
* pgdata, where XLOG_CONTROL_BAK_FILE exist
*/
if (params->incremental_mode != INCR_NONE)
{
if (fio_access(dest_pg_control_fullpath,F_OK,FIO_DB_HOST) == 0){
if (fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_DB_HOST) < 0)
elog(WARNING, "Cannot rename file \"%s\" to \"%s\": %s",
dest_pg_control_fullpath, dest_pg_control_bak_fullpath, strerror(errno));
}
}
}

elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes);
time(&start_time);
thread_interrupted = false;
Expand Down Expand Up @@ -1014,6 +1064,32 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
total_bytes += threads_args[i].restored_bytes;
}

/* [Issue #313] copy pg_control at very end */
if (restore_isok)
{
FILE *out = NULL;
elog(progress ? INFO : LOG, "Progress: Restore file \"%s\"",
dest_pg_control_file->rel_path);

out = fio_fopen(dest_pg_control_fullpath, PG_BINARY_R "+", FIO_DB_HOST);

total_bytes += restore_non_data_file(parent_chain,
dest_backup,
dest_pg_control_file,
out,
dest_pg_control_fullpath, false);
fio_fclose(out);
/* Now backup control file can be deleted */
if (params->incremental_mode != INCR_NONE)
{
pgFile *dst_control;
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
true,0, FIO_BACKUP_HOST);
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
pgFileFree(dst_control);
}
}

time(&end_time);
pretty_time_interval(difftime(end_time, start_time),
pretty_time, lengthof(pretty_time));
Expand Down Expand Up @@ -1098,6 +1174,8 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
parray_free(pgdata_files);
}

if(dest_pg_control_file) pgFileFree(dest_pg_control_file);

for (i = parray_num(parent_chain) - 1; i >= 0; i--)
{
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
Expand Down Expand Up @@ -2230,7 +2308,10 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier,
*/
elog(LOG, "Trying to read pg_control file in destination directory");

system_id_pgdata = get_system_identifier(pgdata, FIO_DB_HOST, false);
get_control_file_or_back_file(pgdata, FIO_DB_HOST, &instance_control);
control_downloaded = true;

system_id_pgdata = instance_control.system_identifier;

if (system_id_pgdata == instance_config.system_identifier)
system_id_match = true;
Expand Down
20 changes: 20 additions & 0 deletions src/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,26 @@ get_current_timeline_from_control(const char *pgdata_path, fio_location location
return ControlFile.checkPointCopy.ThisTimeLineID;
}

void
get_control_file_or_back_file(const char *pgdata_path, fio_location location, ControlFileData *control)
{
char *buffer;
size_t size;

/* First fetch file... */
buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, true, location);

if (!buffer || size == 0){
/* Error read XLOG_CONTROL_FILE or file is truncated, trying read backup */
buffer = slurpFile(pgdata_path, XLOG_CONTROL_BAK_FILE, &size, true, location);
if (!buffer)
elog(ERROR, "Could not read %s and %s files\n", XLOG_CONTROL_FILE, XLOG_CONTROL_BAK_FILE); /* Maybe it should be PANIC? */
}
digestControlFile(control, buffer, size);
pg_free(buffer);
}


/*
* Get last check point record ptr from pg_tonrol.
*/
Expand Down
2 changes: 1 addition & 1 deletion tests/helpers/ptrack_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1783,7 +1783,7 @@ def pgdata_content(self, pgdata, ignore_ptrack=True, exclude_dirs=None):
'ptrack_control', 'ptrack_init', 'pg_control',
'probackup_recovery.conf', 'recovery.signal',
'standby.signal', 'ptrack.map', 'ptrack.map.mmap',
'ptrack.map.tmp'
'ptrack.map.tmp', 'recovery.done','backup_label.old'
]

if exclude_dirs:
Expand Down
Loading

0 comments on commit 52e47fe

Please sign in to comment.