#!/usr/bin/perl
#
# Content Based Restore Script
# http://256.com/gray/docs/content_based_backup/
#
# Copyright 2006 by Gray Watson
#
# Permission to use, copy, modify, and distribute this software for
# any purpose and without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all
# copies, and that the name of Gray Watson not be used in advertising
# or publicity pertaining to distribution of the document or software
# without specific, written prior permission.
#
# Gray Watson makes no representations about the suitability of the
# software described herein for any purpose.  It is provided "as is"
# without express or implied warranty.
#
# The author may be contacted via http://256.com/gray/
#
# $Id: restore.pl,v 1.18 2010-06-13 03:04:32 gray Exp $
#

# name of the database that holds our backup information, overriden with -b
my $database_name = "backup";

# host that the database is running on, overriden with -h
my $database_host = "localhost";

# port that the database is running on, overriden with -p
my $database_port = 5433;

# database type part of the dbi URI
my $database_type = "Pg";

# username to use to connect to the database, overriden with -u
my $database_username = "backup";

# password to use to connect to the database, overriden with -P
my $database_password = "";

###############################################################################
#
# USAGE MESSAGE:
sub usage {
  my($arg) = @_;
  print STDERR

qq[$0: invalid argument usage: $arg
Usage: $0 [-a] [-b id] [-c dir] [-d database] [-D] [-h host] [-p db-port]
       [-P password] [-t db-type] [-u username]
                  file-pattern1 ...
    -a             Restore older versions of all files as file.YYYMMDD
    -b backup-id   Number of backup we want to recover from.  Latest if none.
    -c dir         Master content directory.
    -d database    Database to connect to.  Default: $database_name
    -D             Turn on debug output.
    -h host        Host that is serving the database.  Default: $database_host
    -m machine     Machine we are restoring for.
    -p port        Port to connect to the database.  Default: $database_port
    -P password    Password to use to connect to the database.
    -t db-type     Type portion of the database URI.  Default: $database_type
    -u username    Username to use to connect to the database.
    file-pattern1  File pattern to lookup and recover.
];
  exit 1;
}

###############################################################################
#
# BACKGROUND:
#
# This script restores files that were previously backed up by the
# content-based backup script.  It takes a series of SQL like patterns
# and will restore files into the current directory.
#
###############################################################################
#
# NORMAL USAGE:
#
#    restore.pl -m server1 -c /backup/host/CONTENT %var/log/syslog%
#
# This will check the files table in the backup database and files all
# files which match the SQL like pattern '%var/log/syslog%' and will
# restore them to the current directory.  If it matches the following
# files and directories:
#
#	/usr/var/log/syslog.txt
#	/usr/var/log/syslog.txt~
#	/usr/local/var/log/syslog/auth.txt
#
# Then it will create the usr/var/log and usr/local/var/log/syslog
# directories in the current directory, and will restore the files
# into the directories.  It will try to recover the owner group and
# proper modes of the files if the caller has the permissions to do
# so.  You will probably want to run restore as root.
#
# If you want to recover from a specific backup instead of the latest
# one, then use the (-b #) argument where you specific the number of
# the backup from the database.
#
# You can also specify the -a (all versions) flag which will restore
# the lastest version of the file as 'file' but older versions as
# 'file.YYYYMMDD.BBB' with the date an the backup number appended to
# the filename.  This will allow you to see how the file has changed
# overtime and choose the proper version to restore.
#
###############################################################################

use strict;

use Digest::SHA;
use Fcntl ':mode';
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use Getopt::Long;
use IO::Handle;
use Unix::Mknod qw{makedev mknod major minor};
use DBI;

#
# some constants that can be configured with runtime args
#

# hostname of system being backed up, overriden with -m
chomp(my $hostname = `hostname -s`);

# directory of content files, overriden with -c
my $content_dir = "/usr2/backup/CONTENT";

# name of the machine that we are backing up
my $machine_name;

# some global variables
my $db_conn;
my $all_versions_b = 0;
my $debug_b = 0;
my $epoch_field = "epoch";

# global which holds the paths that we have restored and their signatures
my %restore_path_signatures;

###############################################################################

sub make_path
{
  my ($path, $file_b) = @_;
  my @dirs = split(/\//, $path);
  
  # remove the last entry which should be the file
  pop(@dirs) if $file_b;
  
  my $new_dir;
  for my $dir (@dirs) {
    $new_dir .= '/' if $new_dir; 
    $new_dir .= $dir;
    die "Could not create $new_dir: $!" unless (mkdir($new_dir) || $!{EEXIST});
  }
}

#
# try to get the modes to match from the backup
#
sub fix_modes
{
  my ($path, $row, $uid, $gid) = @_;
  chmod($row->{mode} & 07777, $path) if ($row->{mode} & 07777);
  utime(time, $row->{$epoch_field}, $path) if $row->{$epoch_field};
  chown($uid, $gid, $path) if ($uid || $gid);
}

#
# process a file by checking it with the database
#
sub restore_file
{
  my ($row, $new_path) = @_;
  
  # check content signature
  if ($row->{content} !~ m,^(..)(..)(...*)$,) {
    print "Unknown content form for '$row->{path}' in backup "
      . "'$row->{backup}'\n";
    return undef;
  }
  my $src_path = "$content_dir/$1/$2/$3";
  
  # try to open destination file
  if (-e $new_path) {
    # if we are dumping all the versions of the files and we have not
    # dumped this version before then mark it as a dup and continue
    if ($all_versions_b &&
	(! $restore_path_signatures{$row->{path} . $row->{content}})) {
      # take onto the name the YYYYMMDD if available
      if ($row->{$epoch_field}) {
	my ($x,$x,$x,$day,$mon,$year,$x,$x,$x) =
	  localtime($row->{$epoch_field});
	$new_path .= sprintf(".%04d%02d%02d", $year + 1900, $mon + 1, $day);
      }
      # also tack on the backup number which should be unique
      $new_path .= ".$row->{backup}";
    }
    else {
      print "  File '$row->{path}' skipped from backup $row->{backup}\n"
	if $debug_b;
      return undef;
    }
  }
  
  my $SRC;
  if (-f $src_path) {
    if (not open($SRC, "<", $src_path)) {
      warn "Could not open content '$src_path': $!";
      return undef;
    }
  }
  else {
    $src_path .= '.gz';
    if (! -f $src_path) {
      print "Could not access '$src_path': $!\n";
      return undef;
    }
    
    $SRC = new IO::Uncompress::Gunzip($src_path);
    if (not $SRC) {
      warn "Could not open gzipped content '$src_path': $!";
      return undef;
    }
  }
  
  warn "Overwriting previous version of '$new_path'\n" if -e $new_path;
  
  my $DEST;
  if (not open($DEST, '>', $new_path)) {
    make_path($new_path, my $IS_FILE_B = 1);
    if (not open($DEST, '>', $new_path)) {
      warn "Could not open new path '$new_path': $!";
      return undef;
    }
  }
  
  my $sig = Digest::SHA->new(256);
  while (1) {
    # we cannot use sysread here because of gzip
    my $size = read($SRC, my $buf, 10240);
    die "Read from content file failed: $!" unless defined($size);
    last unless $size;
    $sig->add($buf);
    syswrite($DEST, $buf);
  }
  my $sig_check = $sig->hexdigest();
  print "File $row->{path} signature does not match db\n"
    unless $sig_check eq $row->{content};
  
  print "Restored $new_path with sig $row->{content}\n";
  
  # note the we have restored this path/signature combo
  $restore_path_signatures{$row->{path} . $row->{content}}++;
  
  close($SRC);
  close($DEST);
  return $new_path;
}

sub restore_directory
{
  my ($row, $new_path) = @_;

  # skip dups
  return 0 if -e $new_path;
  
  # just make our directory
  make_path($new_path, my $NOT_FILE_B = 0);
  
  chmod($row->{mode}, $new_path) if $row->{mode};
  return 1;
}

sub restore_symlink
{
  my ($row, $new_path) = @_;
  
  if (not $row->{linkpath}) {
    warn "'$row->{path}' in backup '$row->{backup}' is symlink " .
      "but no linkpath\n";
    return 0;
  }
  if ($row->{size} != length($row->{linkpath})) {
    warn "'$row->{path}' in backup '$row->{backup}' linkpath " .
      "does not match size $row->{size}\n";
    return 0;
  }
  
  # skip dups
  return 0 if (-e $new_path || -l $new_path);
  
  make_path($new_path, my $FILE_B = 1);
  
  die "Could not create symlink from $row->{linkpath} to $new_path: $!\n"
    unless symlink($row->{linkpath}, $new_path);
  return 1;
}

sub restore_device
{
  my ($row, $new_path) = @_;
  
  # skip dups
  return 0 if -e $new_path;
  
  if (not (defined $row->{major} && defined $row->{minor})) {
    warn "'$row->{path}' in backup '$row->{backup}' is device "
      . "but no major or minor  field\n";
    return 0;
  }
  
  # skip dups
  return 0 if (-e $new_path || -b $new_path || -c $new_path);
  
  # make a st_rdev number
  my $rdev = makedev($row->{major}, $row->{minor});
  
  make_path($new_path, my $FILE_B = 1);
  
  die "Could not restore device node '$new_path': $!"
    unless mknod($new_path, 0400, $rdev);
  return 1;
}

sub restore_zero
{
  my ($row, $new_path) = @_;
  
  # zero-length file
  if ($row->{size} != 0) {
    print "'$row->{path}' in backup '$row->{backup}' is zero-file "
      . "but size '$row->{size}'\n";
  }
  
  # skip dups
  return 0 if -e $new_path;
  
  make_path($new_path, my $FILE_B = 1);
  
  my $DEST;
  if (not open($DEST, '>', $new_path)) {
    make_path($new_path, my $IS_FILE_B = 1);
    die "Could not open new path '$new_path': $!"
      unless open($DEST, '>', $new_path);
  }
  # that is all we need to do
  close($DEST);
  return 1;
}

#
# process a file by checking it with the database
#
sub restore_entry
{
  my ($row, $uid, $gid) = @_;
  
  print "  Restoring '$row->{path}' type '$row->{type}' from "
    . "backup $row->{backup}\n" if $debug_b;
  
  my $new_path = $row->{path};
  # remove any leading /s
  $new_path =~ s,^/+,,;
  
  if ($row->{type} == 1) {
    $new_path = restore_file($row, $new_path);
    fix_modes($new_path, $row, $uid, $gid) if $new_path;
  }
  elsif ($row->{type} == 2) {
    if (restore_directory($row, $new_path)) {
      fix_modes($new_path, $row, $uid, $gid);
    }
  }
  elsif ($row->{type} == 3) {
    restore_symlink($row, $new_path);
  }
  elsif ($row->{type} == 4) {
    if (restore_device($row, $new_path)) {
      fix_modes($new_path, $row, $uid, $gid);
    }
  }
  elsif ($row->{type} == 5) {
    if (restore_zero($row, $new_path)) {
      fix_modes($new_path, $row, $uid, $gid);
    }
  }
  else {
    print "Invalid type '$row->{type}' for '$row->{path}' in backup "
      . "'$row->{backup}'\n";
    return;
  }
}

###############################################################################

my $usage_b = 0;
my $backup_id;

GetOptions("all|a" => \$all_versions_b,
	   "backup-id|b=s" => \$backup_id,
	   "content|c=s" => \$content_dir,
	   "database|d=s" => \$database_name,
	   "debug|D" => \$debug_b,
	   "host|h=s" => \$database_host,
	   "machine|m=s" => \$machine_name,
	   "port|p=s" => \$database_port,
	   "password|P=s" => \$database_password,
	   "type|t=s" => \$database_type,
	   "username|u=s" => \$database_username,
	   "help|usage" => \$usage_b,
	   ) || usage();
usage() if $usage_b;
die "Must specify a content directory (-c)\n" unless $content_dir;
die "Content directory '$content_dir' is not a directory\n"
  unless -d $content_dir;

# connect to the DB if not done already (speedy-cgi)
my $db_conn = DBI->connect("dbi:$database_type:dbname=$database_name;host=$database_host"
			   . ";port=$database_port",
			   $database_username, $database_password,
			   { RaiseError => 0, PrintError => 0 });
if (not $db_conn) {
    my $errstr = $DBI::errstr;
    die "Could not connect to $database_host:$database_port database $database_name: $errstr";
}

die "No patterns specified on command-line\n" unless @ARGV;
$machine_name = $hostname unless $machine_name;

umask(0);

# add another where clause if we are looking at a specific id
my $backup_where = "";
if ($backup_id) {
    $backup_where = "AND \"backup\" = $backup_id";
} else {
    $backup_where = "AND \"backup\" in (select id from backups where machine = '$machine_name')";
}

my %uids;
my %gids;
for my $pattern (@ARGV) {
  print "Restoring files that match $pattern";
  print " in backup $backup_id" if $backup_id;
  print ":\n";
  
  my $offset_c = 0;
  my $rows_each = 10000;
  while (1) {
    my $query = qq{SELECT *,EXTRACT(EPOCH FROM mtime) as $epoch_field
		     FROM files
		       WHERE "path" LIKE '$pattern' $backup_where
			 ORDER BY "path", "mtime" DESC
			   LIMIT $rows_each
			     OFFSET $offset_c;};
    my $stmt = $db_conn->prepare($query);
    if (not $stmt) {
      my $errstr = $db_conn->errstr;
      die "Preparing query '$query' failed: $errstr\n";
    }
    if (not $stmt->execute) {
      my $errstr = $db_conn->errstr;
      die "Executing query '$query' failed: $errstr\n";
    }

    my $row_c = 0;
    while (my $row = $stmt->fetchrow_hashref) {
      my $uid;
      if ($row->{owner}) {
	if (not defined $uids{$row->{owner}}) {
	  $uids{$row->{owner}} = (getpwnam($row->{owner}))[2];
	}
	$uid = $uids{$row->{owner}};
      }
      my $gid;
      if ($row->{group}) {
	if (not defined $gids{$row->{group}}) {
	  $gids{$row->{group}} = (getgrnam($row->{group}))[2];
	}
	$gid = $gids{$row->{group}};
      }
      $row_c++;
      restore_entry($row, $uid, $gid);
    }
    last if $row_c < $rows_each;
    
    $offset_c += $row_c;
  }
}
