#!/usr/bin/perl # # Script to check and verify the database rows. # http://256.com/gray/docs/content_based_backup/ # # Copyright 2006 by Gray Watson # # Permission to use, copy, modify, and distribute this software for # any purpose and without fee is hereby granted, provided that the # above copyright notice and this permission notice appear in all # copies, and that the name of Gray Watson not be used in advertising # or publicity pertaining to distribution of the document or software # without specific, written prior permission. # # Gray Watson makes no representations about the suitability of the # software described herein for any purpose. It is provided "as is" # without express or implied warranty. # # The author may be contacted via http://256.com/gray/ # # $Id: check_db.pl,v 1.10 2006/06/08 21:26:59 gray Exp $ # ############################################################################### # # USAGE MESSAGE: sub usage { my($arg) = @_; print STDERR qq[$0: invalid argument usage: $arg Usage: $0 [b backup-id] [-c dir] [-d database] [-D] [-f file-like] [-p db-port] [-P password] [-u username] -b backup-id Optional backup-id to check. Default is none. -c dir Content directory. No default. -d database Database to connect to. -D Turn on debug output. -f file-like Check db entries for file paths like this. Default none. -p port Port to connect to the database. -P password Password to use to connect to the database. -u username Username to use to connect to the database. ]; exit 1; } # ############################################################################### # # BACKGROUND: # # This script walks the files table in the backup database and # verifies that each line has a corresponding file on disk. It can # not fix the issue but brings it to your attention. # ############################################################################### # # NORMAL USAGE: # # check_db.pl -c /backup/host/CONTENT # # This will check the files table in the backup database against the # content files in the /backup/host/CONTENT directory. # ############################################################################### use strict; use Fcntl ':mode'; use PerlIO::gzip; use File::Copy; use File::Find; use File::stat; use Getopt::Long; use IO::Handle; use DBI; # # some constants that can be configured with runtime args # # no default specified so you are forced to enter one my $content_dir; # name of the database that holds our backup information, overriden with -b my $database_name = "backup"; # port that the database is running on, overriden with -p my $database_port = 5433; # username to use to connect to the database, overriden with -u my $database_username = "backup"; # password to use to connect to the database, overriden with -P my $database_password = ""; my $debug_b = 0; my $check_entry_c = 0; my $no_path_c = 0; my $no_owner_c = 0; my $no_group_c = 0; my $no_mode_c = 0; my $invalid_content_c = 0; my $missing_file_c = 0; my $no_link_c = 0; my $bad_link_size_c = 0; my $no_dev_major_c = 0; my $no_dev_minor_c = 0; my $zero_bad_size_c = 0; my $invalid_type_c = 0; # # process a file by checking it with the database # sub check_entry { my ($row) = @_; print "Checking '$row->{path}' in backup $row->{backup}\n" if $debug_b; $check_entry_c++; if (not $row->{path}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' has no path\n"; $no_path_c++; } if (not $row->{owner}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' has no owner\n"; $no_owner_c++; } if (not $row->{group}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' has no group\n"; $no_group_c++; } # I guess in some wierd situations, the mode could be 0 if (not defined $row->{mode}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' has no mode\n"; $no_mode_c++; } if ($row->{type} == 1) { # file if ($row->{content} !~ m,^(..)(..)(...*)$,) { print "ERROR: Unknown content form for '$row->{path}' in backup " . "'$row->{backup}'\n"; $invalid_content_c++; return; } my $file_path = "$content_dir/$1/$2/$3"; if (! (-f "$file_path.gz" || -f $file_path)) { print "ERROR: Missing file '$file_path'\n"; $missing_file_c++; return; } # We can not check the file size or other stuff here because the # file is compressed. We will leave it to check_content to check # the file details. } elsif ($row->{type} == 2) { # directory } elsif ($row->{type} == 3) { # symlink if (not $row->{linkpath}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' is symlink " . "but no linkpath\n"; $no_link_c++; } if ($row->{size} != length($row->{linkpath})) { print "ERROR: '$row->{path}' in backup '$row->{backup}' linkpath " . "does not match size $row->{size}\n"; $bad_link_size_c++; } } elsif ($row->{type} == 4) { if (not defined $row->{major}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' is device " . "but no major field\n"; $no_dev_major_c++; } if (not defined $row->{minor}) { print "ERROR: '$row->{path}' in backup '$row->{backup}' is device " . "but no minor field\n"; $no_dev_minor_c++; } } elsif ($row->{type} == 5) { # zero-length file if ($row->{size} != 0) { print "ERROR: '$row->{path}' in backup '$row->{backup}' is zero-file " . "but size '$row->{size}'\n"; $zero_bad_size_c++; } } else { print "ERROR: Invalid type '$row->{type}' for '$row->{path}' in backup " . "'$row->{backup}'\n"; $invalid_type_c++; } } ############################################################################### my $usage_b = 0; my $unlink_str; my $backup_id; my $file_like; GetOptions("backup-id|b=s" => \$backup_id, "content|c=s" => \$content_dir, "database|d" => \$database_name, "debug|D" => \$debug_b, "file-like|f=s" => \$file_like, "port|p=s" => \$database_port, "password|P=s" => \$database_password, "username|u=s" => \$database_username, "help|usage" => \$usage_b, ) || usage(); usage() if $usage_b; die "Must specify a content directory (-c)\n" unless $content_dir; die "Content directory '$content_dir' is not a directory\n" unless -d $content_dir; # connect to the DB if not done already (speedy-cgi) my $db_conn = DBI->connect("dbi:Pg:dbname=$database_name;port=$database_port", $database_username, $database_password, { RaiseError => 0, PrintError => 0 }); die "Could not connect to database $database_name at port $database_port\n" unless $db_conn; print "Checking database rows with content directory $content_dir\n"; print "Started at " . scalar(localtime) . "\n"; my $offset_c = 0; my $rows_each = 10000; my $where_str = ""; my @wheres; push @wheres, "backup = $backup_id" if $backup_id; push @wheres, "path like \'$file_like\'" if $file_like; $where_str = "WHERE " . join(' AND ', @wheres) if @wheres; while (1) { my $query = qq{SELECT * FROM files $where_str LIMIT $rows_each OFFSET $offset_c;}; my $stmt = $db_conn->prepare($query); if (not $stmt) { my $errstr = $db_conn->errstr; die "Preparing query '$query' failed: $errstr\n"; } if (not $stmt->execute) { my $errstr = $db_conn->errstr; die "Executing query '$query' failed: $errstr\n"; } my $row_c = 0; while (my $row = $stmt->fetchrow_hashref) { check_entry($row); $row_c++; } last if $row_c < $rows_each; $offset_c += $row_c; } print "Finished at " . scalar(localtime) . "\n"; print qq{Checked $check_entry_c entries Entry missing path: $no_path_c Entry missing owner: $no_owner_c Entry missing group: $no_group_c Entry missing mode: $no_mode_c Entry invalid type: $invalid_type_c File invalid content: $invalid_content_c File missing file: $missing_file_c Symlink no link: $no_link_c Symlink bad size: $bad_link_size_c Device no major: $no_dev_major_c Device no minor: $no_dev_minor_c Zero file bad size: $zero_bad_size_c };