#!/usr/bin/perl # # Content Based Restore Script # http://256.com/gray/docs/content_based_backup/ # # Copyright 2006 by Gray Watson # # Permission to use, copy, modify, and distribute this software for # any purpose and without fee is hereby granted, provided that the # above copyright notice and this permission notice appear in all # copies, and that the name of Gray Watson not be used in advertising # or publicity pertaining to distribution of the document or software # without specific, written prior permission. # # Gray Watson makes no representations about the suitability of the # software described herein for any purpose. It is provided "as is" # without express or implied warranty. # # The author may be contacted via http://256.com/gray/ # # $Id: restore.pl,v 1.15 2006/09/14 15:48:40 gray Exp $ # ############################################################################### # # USAGE MESSAGE: sub usage { my($arg) = @_; print STDERR qq[$0: invalid argument usage: $arg Usage: $0 [-a] [-b id] [-c dir] [-d database] [-D] [-p db-port] [-P password] [-u username] file-pattern1 ... -a Restore older versions of all files as file.YYYMMDD -b backup-id Number of backup we want to recover from. Latest if none. -c dir Master content directory. -d database Database to connect to. -D Turn on debug output. -p port Port to connect to the database. -P password Password to use to connect to the database. -u username Username to use to connect to the database. file-pattern1 File pattern to lookup and recover. ]; exit 1; } ############################################################################### # # BACKGROUND: # # This script restores files that were previously backed up by the # content-based backup script. It takes a series of SQL like patterns # and will restore files into the current directory. # ############################################################################### # # NORMAL USAGE: # # restore.pl -c /backup/host/CONTENT %var/log/syslog% # # This will check the files table in the backup database and files all # files which match the SQL like pattern '%var/log/syslog%' and will # restore them to the current directory. If it matches the following # files and directories: # # /usr/var/log/syslog.txt # /usr/var/log/syslog.txt~ # /usr/local/var/log/syslog/auth.txt # # Then it will create the usr/var/log and usr/local/var/log/syslog # directories in the current directory, and will restore the files # into the directories. It will try to recover the owner group and # proper modes of the files if the caller has the permissions to do # so. You will probably want to run restore as root. # # If you want to recover from a specific backup instead of the latest # one, then use the (-b #) argument where you specific the number of # the backup from the database. # # You can also specify the -a (all versions) flag which will restore # the lastest version of the file as 'file' but older versions as # 'file.YYYYMMDD.BBB' with the date an the backup number appended to # the filename. This will allow you to see how the file has changed # overtime and choose the proper version to restore. # ############################################################################### use strict; use Digest::SHA2; use Fcntl ':mode'; use PerlIO::gzip; use Getopt::Long; use IO::Handle; use Unix::Mknod qw{makedev mknod major minor}; use DBI; # # some constants that can be configured with runtime args # # hostname of system being backed up, overriden with -m chomp(my $hostname = `hostname -s`); # directory of content files, overriden with -c my $content_dir = "/usr2/backup/$hostname/CONTENT"; # name of the database that holds our backup information, overriden with -b my $database_name = "backup"; # port that the database is running on, overriden with -p my $database_port = 5433; # username to use to connect to the database, overriden with -u my $database_username = "gray"; # password to use to connect to the database, overriden with -P my $database_password = ""; # some global variables my $db_conn; my $all_versions_b = 0; my $debug_b = 0; my $epoch_field = "epoch"; # global which holds the paths that we have restored and their signatures my %restore_path_signatures; ############################################################################### sub make_path { my ($path, $file_b) = @_; my @dirs = split(/\//, $path); # remove the last entry which should be the file pop(@dirs) if $file_b; my $new_dir; for my $dir (@dirs) { $new_dir .= '/' if $new_dir; $new_dir .= $dir; die "Could not create $new_dir: $!" unless (mkdir($new_dir) || $!{EEXIST}); } } # # try to get the modes to match from the backup # sub fix_modes { my ($path, $row, $uid, $gid) = @_; chmod($row->{mode} & 07777, $path) if ($row->{mode} & 07777); utime(time, $row->{$epoch_field}, $path) if $row->{$epoch_field}; chown($uid, $gid, $path) if ($uid || $gid); } # # process a file by checking it with the database # sub restore_file { my ($row, $new_path) = @_; # check content signature if ($row->{content} !~ m,^(..)(..)(...*)$,) { print "Unknown content form for '$row->{path}' in backup " . "'$row->{backup}'\n"; return undef; } my $src_path = "$content_dir/$1/$2/$3"; # try to open destination file if (-e $new_path) { # if we are dumping all the versions of the files and we have not # dumped this version before then mark it as a dup and continue if ($all_versions_b && (! $restore_path_signatures{$row->{path} . $row->{content}})) { # take onto the name the YYYYMMDD if available if ($row->{$epoch_field}) { my ($x,$x,$x,$day,$mon,$year,$x,$x,$x) = localtime($row->{$epoch_field}); $new_path .= sprintf(".%04d%02d%02d", $year + 1900, $mon + 1, $day); } # also tack on the backup number which should be unique $new_path .= ".$row->{backup}"; } else { print " File '$row->{path}' skipped from backup $row->{backup}\n" if $debug_b; return undef; } } my $SRC; if (-f $src_path) { if (not open($SRC, "<", $src_path)) { warn "Could not open content '$src_path': $!"; return undef; } } else { $src_path .= '.gz'; if (! -f $src_path) { print "Missing file '$src_path'\n"; return undef; } if (not open($SRC, "<:gzip", $src_path)) { warn "Could not open gzipped content '$src_path': $!"; return undef; } } warn "Overwriting previous version of '$new_path'\n" if -e $new_path; my $DEST; if (not open($DEST, '>', $new_path)) { make_path($new_path, my $IS_FILE_B = 1); if (not open($DEST, '>', $new_path)) { warn "Could not open new path '$new_path': $!"; return undef; } } my $sig = Digest::SHA2::new(256); while (1) { # we cannot use sysread here because of gzip my $size = read($SRC, my $buf, 10240); die "Read from content file failed: $!" unless defined($size); last unless $size; $sig->add($buf); syswrite($DEST, $buf); } my $sig_check = $sig->hexdigest(); print "File $row->{path} signature does not match db\n" unless $sig_check eq $row->{content}; print "Restored $new_path with sig $row->{content}\n"; # note the we have restored this path/signature combo $restore_path_signatures{$row->{path} . $row->{content}}++; close($SRC); close($DEST); return $new_path; } sub restore_directory { my ($row, $new_path) = @_; # skip dups return 0 if -e $new_path; # just make our directory make_path($new_path, my $NOT_FILE_B = 0); chmod($row->{mode}, $new_path) if $row->{mode}; return 1; } sub restore_symlink { my ($row, $new_path) = @_; if (not $row->{linkpath}) { warn "'$row->{path}' in backup '$row->{backup}' is symlink " . "but no linkpath\n"; return 0; } if ($row->{size} != length($row->{linkpath})) { warn "'$row->{path}' in backup '$row->{backup}' linkpath " . "does not match size $row->{size}\n"; return 0; } # skip dups return 0 if (-e $new_path || -l $new_path); make_path($new_path, my $FILE_B = 1); die "Could not create symlink from $row->{linkpath} to $new_path: $!\n" unless symlink($row->{linkpath}, $new_path); return 1; } sub restore_device { my ($row, $new_path) = @_; # skip dups return 0 if -e $new_path; if (not (defined $row->{major} && defined $row->{minor})) { warn "'$row->{path}' in backup '$row->{backup}' is device " . "but no major or minor field\n"; return 0; } # skip dups return 0 if (-e $new_path || -b $new_path || -c $new_path); # make a st_rdev number my $rdev = makedev($row->{major}, $row->{minor}); make_path($new_path, my $FILE_B = 1); die "Could not restore device node '$new_path': $!" unless mknod($new_path, 0400, $rdev); return 1; } sub restore_zero { my ($row, $new_path) = @_; # zero-length file if ($row->{size} != 0) { print "'$row->{path}' in backup '$row->{backup}' is zero-file " . "but size '$row->{size}'\n"; } # skip dups return 0 if -e $new_path; make_path($new_path, my $FILE_B = 1); my $DEST; if (not open($DEST, '>', $new_path)) { make_path($new_path, my $IS_FILE_B = 1); die "Could not open new path '$new_path': $!" unless open($DEST, '>', $new_path); } # that is all we need to do close($DEST); return 1; } # # process a file by checking it with the database # sub restore_entry { my ($row, $uid, $gid) = @_; print " Restoring '$row->{path}' type '$row->{type}' from " . "backup $row->{backup}\n" if $debug_b; my $new_path = $row->{path}; # remove any leading /s $new_path =~ s,^/+,,; if ($row->{type} == 1) { $new_path = restore_file($row, $new_path); fix_modes($new_path, $row, $uid, $gid) if $new_path; } elsif ($row->{type} == 2) { if (restore_directory($row, $new_path)) { fix_modes($new_path, $row, $uid, $gid); } } elsif ($row->{type} == 3) { restore_symlink($row, $new_path); } elsif ($row->{type} == 4) { if (restore_device($row, $new_path)) { fix_modes($new_path, $row, $uid, $gid); } } elsif ($row->{type} == 5) { if (restore_zero($row, $new_path)) { fix_modes($new_path, $row, $uid, $gid); } } else { print "Invalid type '$row->{type}' for '$row->{path}' in backup " . "'$row->{backup}'\n"; return; } } ############################################################################### my $usage_b = 0; my $backup_id; GetOptions("all|a" => \$all_versions_b, "backup-id|b=s" => \$backup_id, "content|c=s" => \$content_dir, "database|d" => \$database_name, "debug|D" => \$debug_b, "port|p=s" => \$database_port, "password|P=s" => \$database_password, "username|u=s" => \$database_username, "help|usage" => \$usage_b, ) || die; usage() if $usage_b; die "Must specify a content directory (-c)\n" unless $content_dir; die "Content directory '$content_dir' is not a directory\n" unless -d $content_dir; # connect to the DB if not done already (speedy-cgi) my $db_conn = DBI->connect("dbi:Pg:dbname=$database_name;port=$database_port", $database_username, $database_password, { RaiseError => 0, PrintError => 0 }); die "Could not connect to database $database_name at port $database_port\n" unless $db_conn; die "No patterns specified on command-line\n" unless @ARGV; umask(0); # add another where clause if we are looking at a specific id my $backup_where = ""; $backup_where = "AND \"backup\" = $backup_id" if $backup_id; my %uids; my %gids; for my $pattern (@ARGV) { print "Restoring files that match $pattern"; print " in backup $backup_id" if $backup_id; print ":\n"; my $offset_c = 0; my $rows_each = 10000; while (1) { my $query = qq{SELECT *,EXTRACT(EPOCH FROM mtime) as $epoch_field FROM files WHERE "path" LIKE '$pattern' $backup_where ORDER BY "path", "mtime" DESC LIMIT $rows_each OFFSET $offset_c;}; my $stmt = $db_conn->prepare($query); if (not $stmt) { my $errstr = $db_conn->errstr; die "Preparing query '$query' failed: $errstr\n"; } if (not $stmt->execute) { my $errstr = $db_conn->errstr; die "Executing query '$query' failed: $errstr\n"; } my $row_c = 0; while (my $row = $stmt->fetchrow_hashref) { my $uid; if ($row->{owner}) { if (not defined $uids{$row->{owner}}) { $uids{$row->{owner}} = (getpwnam($row->{owner}))[2]; } $uid = $uids{$row->{owner}}; } my $gid; if ($row->{group}) { if (not defined $gids{$row->{group}}) { $gids{$row->{group}} = (getgrnam($row->{group}))[2]; } $gid = $gids{$row->{group}}; } $row_c++; restore_entry($row, $uid, $gid); } last if $row_c < $rows_each; $offset_c += $row_c; } }