#!/usr/bin/perl
#
# Perl script to scan raw disk blocks looking for certain patterns.  This allows you to "undelete"
# a file that I just wrote over somehow if any old temporary copies are still around.
#
# Usage:
#
#     perl recover regex-pattern1 [regiex-pattern2] disk-device | gzip -9 > hopeful.gz
#
# A block must match all of the patterns to be "found".
#
# WARNING: You must pipe the output from this command to netcat to get it off of the box or, if
# not possible, make sure to gzip the output so the script doesn't find the log file on disk
# and go recursive.  For example:
#
#     # piping to netcat to send it to another server
#     perl recover.pl sendmail_pid /dev/md0 | nc server1 5000
#
#     # or piping to gzip to avoid the recursion
#     perl recover.pl sendmail_pid /dev/sda1 | gzip -9 > hopeful.gz
#
# $Id: recover.pl,v 1.3 2010/05/05 20:29:00 gray Exp $
#

use strict;

# how big is a block
my $BLOCK_SIZE = $ENV{RECOVER_BLOCK_SIZE} || 10240;

# how many bytes to keep from the last block so we won't miss patterns that are on block boundaries
my $OVERLAP = $ENV{RECOVER_OVERLAP} || 128;

# once we find a match how many blocks before the match to dump 
my $BEFORE_NUM = $ENV{RECOVER_BEFORE_NUM} || 2;

# once we find a match how many blocks after the match to dump 
my $AFTER_NUM = $ENV{RECOVER_AFTER_NUM} || 2;

####################################################

die "Usage: recover regex1 [regex2 ...] disk-device\n" if scalar(@ARGV) < 2;

my @pats;
while (@ARGV > 1) {
    my $pat = shift;
    # compile regex patterns 
    push(@pats, qr{$pat});
}

# now open our disk file
open(DISK, '<', $ARGV[0]) || die "Cannot open $ARGV[0]: $!\n";

print STDERR "**** WARNING: output from this script should pipe through gzip or go to another volume ****\n";

my $block;
my $blockC = 0;
my @prev_blocks;
my $nextC = 0;
READ: while (1) {
    # save the last bit of the block so the patterns with match data at the very end of a block
    my $overlap = substr($block, (- $OVERLAP));
    last unless sysread(DISK, $block, $BLOCK_SIZE) > 0;
    $blockC++;
    # make sure this block has all of the patterns
    my $matched = 1;
    # prepend the overlap
    my $block_with_overlap = $overlap . $block;
    for my $pat (@pats) {
	if ($block_with_overlap !~ $pat) {
	    $matched = 0;
	    last;
	}
    }
    # are we dumping blocks after a previous match?
    if ($nextC > 0) {
	$nextC--;
	# if this block too matched then we need to read more next blocks
	$nextC = $AFTER_NUM if $matched;
	print $block;
	# we don't look for the pattern in this block since we are already dumping it
	next;
    }
    if ($matched) {
	print STDERR "Found match at block $blockC\n";
	# found all patterns
	print "\n\n--- MATCH @ block $blockC ---\n\n";
	for my $prev (@prev_blocks) {
	    print $prev;
	}
	if (scalar(@prev_blocks) == 0) {
	    print $block_with_overlap;
	} else {
	    print $block;
	}
	# clear the prev-blocks
	@prev_blocks = ();
	# we want the next ones as well
	$nextC = $AFTER_NUM;
    } else {
	# remove the first block from list
	shift(@prev_blocks) if scalar(@prev_blocks) >= $BEFORE_NUM;
	push(@prev_blocks, $block);
    }
}

