#!/usr/bin/perl
#
# Script to check and verify the database rows.
# http://256.com/gray/docs/content_based_backup/
#
# Copyright 2006 by Gray Watson
#
# Permission to use, copy, modify, and distribute this software for
# any purpose and without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all
# copies, and that the name of Gray Watson not be used in advertising
# or publicity pertaining to distribution of the document or software
# without specific, written prior permission.
#
# Gray Watson makes no representations about the suitability of the
# software described herein for any purpose.  It is provided "as is"
# without express or implied warranty.
#
# The author may be contacted via http://256.com/gray/
#
# $Id: check_db.pl,v 1.12 2010-06-13 03:04:32 gray Exp $
#

# name of the database that holds our backup information, overriden with -b
my $database_name = "backup";

# host that the database is running on, overriden with -h
my $database_host = "localhost";

# port that the database is running on, overriden with -p
my $database_port = 5433;

# database type part of the dbi URI
my $database_type = "Pg";

# username to use to connect to the database, overriden with -u
my $database_username = "backup";

# password to use to connect to the database, overriden with -P
my $database_password = "";

###############################################################################
#
# USAGE MESSAGE:
sub usage {
  my($arg) = @_;
  print STDERR

qq[$0: invalid argument usage: $arg
Usage: $0 [b backup-id] [-c dir] [-d database] [-D] [-f file-like] [-h host]
       [-p db-port] [-P password] [-t db-type] [-u username]

    -b backup-id   Optional backup-id to check.  Default is none.
    -c dir         Content directory.  No default.
    -d database    Database to connect to.
    -D             Turn on debug output.
    -f file-like   Check db entries for file paths like this.  Default none.
    -h host        Host that is serving the database.  Default: $database_host
    -p port        Port to connect to the database.  Default: $database_port
    -P password    Password to use to connect to the database.
    -t db-type     Type portion of the database URI.  Default: $database_type
    -u username    Username to use to connect to the database.
];
  exit 1;
}
#
###############################################################################
#
# BACKGROUND:
#
# This script walks the files table in the backup database and
# verifies that each line has a corresponding file on disk.  It can
# not fix the issue but brings it to your attention.
#
###############################################################################
#
# NORMAL USAGE:
#
#    check_db.pl -c /backup/host/CONTENT
#
# This will check the files table in the backup database against the
# content files in the /backup/host/CONTENT directory.
#
###############################################################################

use strict;

use Fcntl ':mode';
use File::Copy;
use File::Find;
use File::stat;
use Getopt::Long;
use IO::Handle;
use DBI;

#
# some constants that can be configured with runtime args
#

# no default specified so you are forced to enter one
my $content_dir;

my $debug_b = 0;
my $check_entry_c = 0;
my $no_path_c = 0;
my $no_owner_c = 0;
my $no_group_c = 0;
my $no_mode_c = 0;
my $invalid_content_c = 0;
my $missing_file_c = 0;
my $no_link_c = 0;
my $bad_link_size_c = 0;
my $no_dev_major_c = 0;
my $no_dev_minor_c = 0;
my $zero_bad_size_c = 0;
my $invalid_type_c = 0;

#
# process a file by checking it with the database
#
sub check_entry
{
  my ($row) = @_;
  
  print "Checking '$row->{path}' in backup $row->{backup}\n" if $debug_b;
  $check_entry_c++;
  
  if (not $row->{path}) {
    print "ERROR: '$row->{path}' in backup '$row->{backup}' has no path\n";
    $no_path_c++;
  }
  if (not $row->{owner}) {
    print "ERROR: '$row->{path}' in backup '$row->{backup}' has no owner\n";
    $no_owner_c++;
  }
  if (not $row->{group}) {
    print "ERROR: '$row->{path}' in backup '$row->{backup}' has no group\n";
    $no_group_c++;
  }
  # I guess in some wierd situations, the mode could be 0
  if (not defined $row->{mode}) {
    print "ERROR: '$row->{path}' in backup '$row->{backup}' has no mode\n";
    $no_mode_c++;
  }
  
  if ($row->{type} == 1) {
    # file
    if ($row->{content} !~ m,^(..)(..)(...*)$,) {
      print "ERROR: Unknown content form for '$row->{path}' in backup "
	. "'$row->{backup}'\n";
      $invalid_content_c++;
      return;
    }
    
    my $file_path = "$content_dir/$1/$2/$3";
    if (! (-f "$file_path.gz" || -f $file_path)) {
      print "ERROR: Missing file '$file_path'\n";
      $missing_file_c++;
      return;
    }
    
    # We can not check the file size or other stuff here because the
    # file is compressed.  We will leave it to check_content to check
    # the file details.
  }
  elsif ($row->{type} == 2) {
    # directory
  }
  elsif ($row->{type} == 3) {
    # symlink
    if (not $row->{linkpath}) {
      print "ERROR: '$row->{path}' in backup '$row->{backup}' is symlink " .
	"but no linkpath\n";
      $no_link_c++;
    }
    if ($row->{size} != length($row->{linkpath})) {
      print "ERROR: '$row->{path}' in backup '$row->{backup}' linkpath " .
	"does not match size $row->{size}\n";
      $bad_link_size_c++;
    }
  }
  elsif ($row->{type} == 4) {
    if (not defined $row->{major}) {
      print "ERROR: '$row->{path}' in backup '$row->{backup}' is device "
	. "but no major field\n";
      $no_dev_major_c++;
    }
    if (not defined $row->{minor}) {
      print "ERROR: '$row->{path}' in backup '$row->{backup}' is device "
	. "but no minor field\n";
      $no_dev_minor_c++;
    }
  }
  elsif ($row->{type} == 5) {
    # zero-length file
    if ($row->{size} != 0) {
      print "ERROR: '$row->{path}' in backup '$row->{backup}' is zero-file "
	. "but size '$row->{size}'\n";
      $zero_bad_size_c++;
    }
  }
  else {
    print "ERROR: Invalid type '$row->{type}' for '$row->{path}' in backup "
      . "'$row->{backup}'\n";
    $invalid_type_c++;
  }
}  

###############################################################################

my $usage_b = 0;
my $unlink_str;
my $backup_id;
my $file_like;

GetOptions("backup-id|b=s" => \$backup_id,
	   "content|c=s" => \$content_dir,
	   "database|d=s" => \$database_name,
	   "debug|D" => \$debug_b,
	   "file-like|f=s" => \$file_like,
	   "host|h=s" => \$database_host,
	   "port|p=s" => \$database_port,
	   "password|P=s" => \$database_password,
	   "type|t=s" => \$database_type,
	   "username|u=s" => \$database_username,
	   "help|usage" => \$usage_b,
	   ) || usage();
usage() if $usage_b;
die "Must specify a content directory (-c)\n" unless $content_dir;
die "Content directory '$content_dir' is not a directory\n"
  unless -d $content_dir;

# connect to the DB if not done already (speedy-cgi)
my $db_conn = DBI->connect("dbi:$database_type:dbname=$database_name;host=$database_host"
			   . ";port=$database_port",
			   $database_username, $database_password,
			   { RaiseError => 0, PrintError => 0 });
if (not $db_conn) {
    my $errstr = $DBI::errstr;
    die "Could not connect to $database_host:$database_port database $database_name: $errstr";
}

print "Checking database rows with content directory $content_dir\n";
print "Started at " . scalar(localtime) . "\n";

my $offset_c = 0;
my $rows_each = 10000;

my $where_str = "";
my @wheres;
push @wheres, "backup = $backup_id" if $backup_id;
push @wheres, "path like \'$file_like\'" if $file_like;
$where_str = "WHERE " . join(' AND ', @wheres) if @wheres;

while (1) {
  my $query = qq{SELECT * FROM files
		   $where_str
		     LIMIT $rows_each
		       OFFSET $offset_c;};
  my $stmt = $db_conn->prepare($query);
  if (not $stmt) {
    my $errstr = $db_conn->errstr;
    die "Preparing query '$query' failed: $errstr\n";
  }
  if (not $stmt->execute) {
    my $errstr = $db_conn->errstr;
    die "Executing query '$query' failed: $errstr\n";
  }
  
  my $row_c = 0;
  while (my $row = $stmt->fetchrow_hashref) {
    check_entry($row);
    $row_c++;
  }
  last if $row_c < $rows_each;
  
  $offset_c += $row_c;
}

print "Finished at " . scalar(localtime) . "\n";
print qq{Checked $check_entry_c entries
   Entry missing path: $no_path_c
  Entry missing owner: $no_owner_c
  Entry missing group: $no_group_c
   Entry missing mode: $no_mode_c
   Entry invalid type: $invalid_type_c
 File invalid content: $invalid_content_c
    File missing file: $missing_file_c
      Symlink no link: $no_link_c
     Symlink bad size: $bad_link_size_c
      Device no major: $no_dev_major_c
      Device no minor: $no_dev_minor_c
   Zero file bad size: $zero_bad_size_c
};
