#!/usr/bin/perl -w

# disk power failure tester -- by Tim Ellis of Digg, Inc.
# copyright 2005-2007, released under the terms of the GNU GPL.
# latest extensive modifications: April 2007.
#
# Run the script with no arguments to get a sample usage.
#
# This script is used to stress test disks right before a power failure. It is
# based on an idea by Brad Fitzpatrick of LiveJournal. There may be an
# equivalent set of scripts released by him that do this same task. If so, you
# might check them out to see if they do anything this script doesn't.
#
# Script philosophy:
#
# 1. Open several large files as O_SYNC,
# 2. Write blocks of initialising data into those files,
# 3. Write blocks of random data into the same files,
# 4. Output the block number written to on unbuffered STDOUT,
# 5. Power shutdown on test machine,
# 6. Terminals remain open with a list of block numbers,
# 7. Those blocks are claimed written to disk,
# 8. If not, something lied.
#
# The read portion of the script will be looking at the last (basically random)
# blocks the machine claimed to write to disk, hence they should all share the
# same MD5sum (that of the data being written). If any don't, then those blocks
# weren't actually written to disk before the power was pulled.
#
# Notes: Your disk I/O subsystem should be well-utilised during the test. If
# six concurrent processes don't stress your I/O subsystem, make it twelve (or
# more).

use strict;
use Fcntl;
use Fcntl qw(:DEFAULT :flock);

my $prefixPath = "/TinySpace";
my $defaultScriptDevSz = 900000;
my $device = "synchiotest.dat";
my $blockSize = 64 * 1024;
my $minBlock = 200;
my $maxBlock = 10000;
my $blockNum;
my $inFile;
my $minMaxAdd;
my $ptSelect;
my $cmd;
my $mode;
my $noOutput = 0;
my @extraArgs;

# these will be how many blocks are skipped before writing another. we choose
# some random values in case this script becomes popular one day amongst disk
# and controller manufacturers to reduce the possibility of tuning the hardware
# to pass this test when in fact they're still hosed - this list could be any
# list of prime numbers, large or small. so feel free to change them. (they need
# not necessarily even be primes)
#
# only really one non-random prime in this list. i wanted 65536+1 to make an
# appearance! yay 65537!
#
# since i'm limiting the device to 900,000 blocks, there isn't much
# point in any prime larger than 300,000 - so I commented out a few. when 3-4TB
# HDD become common, up the device sizes to maybe 3,000,000 blocks and uncomment a
# few more primes (up to 1/3 default blocksize seems reasonable).
my @primeList = (
  5437,   5441,   5443,   5449,   9397,   9403,   9413,   9419,
 10259,  10267,  10271,  10273,  14447,  14449,  14461,  14479,
 17627,  17657,  17659,  17669,  19433,  19441,  19447,  19457,
 22079,  22091,  22093,  22109,  32537,  32561,  32563,  32569,
 45281,  45289,  45293,  45307,  45341,  45343,  45361,  45377,
 53051,  53069,  53077,  53087,  65497,  65519,  65521,  65537,
 65563,  65579,  65581,  65587,  79357,  79367,  79379,  79393,
 79427,  79433,  79451,  79481,  80177,  80191,  80207,  80209,
 18013,  18041,  18043,  18047,  18049,  18059,  18061,  18077,
 25469,  25471,  25523,  25537,  25541,  25561,  25577,  25579,
 38971,  38977,  38993,  39019,  39023,  39041,  39043,  39047,
 42853,  42859,  42863,  42899,  42901,  42923,  42929,  42937,
 55109,  55117,  55127,  55147,  55163,  55171,  55201,  55207,
 69593,  69623,  69653,  69661,  69677,  69691,  69697,  69709,
 84131,  84137,  84143,  84163,  84179,  84181,  84191,  84199,
 90499,  90511,  90523,  90527,  90529,  90533,  90547,  90583,
103289, 103291, 103307, 103319, 103333, 103349, 103357, 103387,
112153, 112163, 112181, 112199, 112207, 112213, 112223, 112237,
131641, 131671, 131687, 131701, 131707, 131711, 131713, 131731,
176651, 176677, 176699, 176711, 253751, 253763, 253769, 253777,
182851, 182857, 182867, 182887, 182893, 182899, 182921, 182927,
185161, 185167, 185177, 185183, 185189, 185221, 185233, 185243,
189619, 189643, 189653, 189661, 189671, 189691, 189697, 189701,
219433, 219437, 219451, 219463, 219467, 219491, 219503, 219517,
226201, 226217, 226231, 226241, 226267, 226283, 226307, 226313,
253819, 253823, 253853, 253867, 375983, 375997, 376001, 376003,
293717, 293723, 293729, 293749, 293767, 293773, 293791, 293803,
#307817, 307823, 307831, 307843, 307859, 307871, 307873, 307891,
#314953, 314957, 314983, 314989, 315011, 315013, 315037, 315047,
#331171, 331183, 331207, 331213, 331217, 331231, 331241, 331249,
#376063, 376081, 376097, 376099, 500111, 500113, 500119, 500153,
#338263, 338267, 338269, 338279, 338287, 338293, 338297, 338309,
#374111, 374117, 374123, 374137, 374149, 374159, 374173, 374177,
#429127, 429137, 429139, 429161, 429181, 429197, 429211, 429217,
#465077, 465079, 465089, 465107, 465119, 465133, 465151, 465161,
#481787, 481801, 481807, 481813, 481837, 481843, 481847, 481849,
#500197, 500209, 500231, 500233, 500287, 500299, 500317, 500321,
#530093, 530129, 530137, 530143, 530177, 530183, 530197, 530203,
#593777, 593783, 593839, 593851, 593863, 593869, 593899, 593903,
#648173, 648181, 648191, 648199, 648211, 648217, 648229, 648239,
#673019, 673039, 673063, 673069, 673073, 673091, 673093, 673109,
#675559, 675569, 675581, 675593, 675601, 675607, 675611, 675617,
#727483, 727487, 727499, 727501, 727541, 727561, 727577, 727589,
#747869, 747871, 747889, 747917, 747919, 747941, 747953, 747977,
#755317, 755329, 755333, 755351, 755357, 755371, 755387, 755393,
#795127, 795139, 795149, 795161, 795187, 795203, 795211, 795217,
#813311, 813343, 813361, 813367, 813427, 813443, 813493, 813499,
#830257, 830267, 830279, 830293, 830309, 830311, 830327, 830329,
#865591, 865597, 865609, 865619, 865637, 865639, 865643, 865661,
#873157, 873209, 873247, 873251, 873263, 873293, 873317, 873319,
#895801, 895813, 895823, 895841, 895861, 895879, 895889, 895901,
#921353, 921373, 921379, 921407, 921409, 921457, 921463, 921467,
#941617, 941641, 941653, 941663, 941669, 941671, 941683, 941701,
#946877, 946901, 946919, 946931, 946943, 946949, 946961, 946969,
#985499, 985519, 985529, 985531, 985547, 985571, 985597, 985601,
);
my $blockAdd = $primeList [int (rand (scalar @primeList))]; 

unless (scalar @ARGV > 0) {
    doUsage();
}

# sample usages
if ($ARGV[0] eq 'printXtermScript') {
    &printXtermScript();
} elsif ($ARGV[0] eq 'printSilentScript') {
    &printSilentScript();
}

foreach my $arg (@ARGV) {
    if    ($arg =~ /^mode=(.+)$/)   { $mode = $1; print "mode=$mode\n"; }
    elsif ($arg =~ /^if=(.+)$/)     { $inFile = $1; print "if=$inFile\n"; }
    elsif ($arg =~ /^of=(.+)$/)     { $device = $1; print "of=$device\n"; }
    elsif ($arg =~ /^mmb=(.+)$/)    { $minMaxAdd = $1; print "mmb=$minMaxAdd\n"; }
    elsif ($arg =~ /^pt=(.+)$/)     { $ptSelect = $1; print "pt=$ptSelect\n"; }
    elsif ($arg =~ /^bs=(\d+)$/)    { $blockSize = $1; print "bs=$blockSize\n"; }
    elsif ($arg =~ /^silent$/)      { $noOutput = 1; print "silent=$noOutput\n"; }
    else  { push (@extraArgs, $arg); }
}

# -------------------------------------------------------------------------------
# from "man 2 open" on Linux:
# -------------------------------------------------------------------------------
# O_SYNC The file is opened for synchronous I/O. Any writes on the resulting file
# descriptor will block the  calling  process until the data has been physically
# written to the underlying hardware.
# -------------------------------------------------------------------------------
if ($mode eq 'read') {
    system ("touch $inFile");
    sysopen (BLOCKDEV, $inFile, O_RDWR|O_SYNC) || die "couldn't open $inFile for O_RDWR|O_SYNC";
} else {
    system ("touch $device");
    sysopen (BLOCKDEV, $device, O_RDWR|O_SYNC) || die "couldn't open $device for O_RDWR|O_SYNC";
}

# this is important, if the STDOUT is buffered, then the list of blocks you see
# might not actually be the blocks that were written
select STDERR; $| = 1;      # make unbuffered
select STDOUT; $| = 1;      # make unbuffered

# pick the pattern to write
my $testPattern = "";
if ($ptSelect) {
    my $smallPattern;
    if    ($ptSelect eq 'digg') {
        $smallPattern = "Digg_is_a_rockin'_place_to_work,_and_there_is_no_doubt_of_that._1234567890abcdef_";
    } elsif ($ptSelect eq 'lucy') {
        $smallPattern = "Lucy_Liu?_who_is_that_girl?_is_she_in_some_movie_or..._she_isn't_bad._Nice_Smile!";
    } elsif ($ptSelect eq 'iotbit') {
        $smallPattern = "89:;<=>?ABCpdqEGHI-J!@+K!!L#M_##NO&P'&&Q(''R)((S*))T+**U,++V-,,W.--X/..Y0//Z100[2";
    } elsif ($ptSelect eq 'rand1') {
        $smallPattern = "26e69955d29cefaac46d9b756f7e342c2cc9960fc28827478d8828c997dc00791d37e18a23d91fd7d";
    } elsif ($ptSelect eq 'rand2') {
        $smallPattern = "d1f632d83e91300c67479b8dedf8b4f1e8be45bab7260293fb1915f81b87e3fd4b1ec981ca89401af";
    } elsif ($ptSelect eq 'rawk') {
        $smallPattern = "RAWK!_I_love_testing_disks!_Pulling_the_power_when_they're_stressed_is_a rush_!!!";
    } elsif ($ptSelect eq 'init') {
        $smallPattern = "000000000000000000000000000000000000000000000000000000000000000000000000000000000";
    } elsif ($ptSelect eq 'zero') {
        $smallPattern = "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
                      . "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
                      . "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
                      . "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000";
    }
    # testPattern will be approx. 64K
    foreach my $i (1 .. 800) {
        $testPattern .= sprintf ('%03d', $i) . ":$smallPattern\n";
    }
}

if ($mode eq 'write') {
    unless ($testPattern) { doUsage(); }
    # if they pass this, parse it
    if (defined $minMaxAdd) {
        print " - Parsing minMaxAdd = $minMaxAdd\n";
        ($minBlock, $maxBlock, $blockAdd) = split (/,/, $minMaxAdd);
    }

    $blockNum = $minBlock;
    my $numBlocksOutput = 0;

    # keep going forever or until they pull the plug
    while (1) {
        sysseek (BLOCKDEV, $blockNum*$blockSize, 0) || die "couldn't seek to $blockNum * $blockSize ($!)";
        syswrite (BLOCKDEV, $testPattern, $blockSize) || die "couldn't syswrite() $blockSize bytes at offset=$blockNum ($!)";
        unless ($noOutput) {
            print " $blockNum";
            $numBlocksOutput++;
            if ($numBlocksOutput > 100) {
                print "\n$0 mode=read if=$device bs=$blockSize";
                $numBlocksOutput = 0;
            }
        }
        #select(undef, undef, undef, 0.05);
        $blockNum += $blockAdd;
        if ($blockNum > $maxBlock) { $blockNum = $blockNum - $maxBlock + $minBlock; }
    }
} elsif ($mode eq 'read') {
    foreach my $arg (@extraArgs) {
        my $readBlockData = "should be gone";
        sysseek (BLOCKDEV, $arg*$blockSize, 0) || die "couldn't seek to $arg * $blockSize ($!)";
        sysread (BLOCKDEV, $readBlockData, $blockSize) || die "couldn't sysread() $blockSize bytes at offset=$arg ($!)";
        open (OFILE, "> $arg.block");
        print OFILE $readBlockData;
        close OFILE;
        system ("md5sum $arg.block");
    }
    print "rm *.block #do this to remove temp .block files\n";
} else { &doUsage(); }



# ----------------------------------------------------------------------
# subroutines
# ----------------------------------------------------------------------
sub doUsage {
    print "First, make sure $prefixPath is a filesystem mounted sync and you have write access.\n\n";
    print "usage: $0 mode=write bs=<blocksize> pt={digg|lucy|iotbit|rand1|rand2|rawk} mmb=[minBlock,maxBlock,blockAdd]\n";
    print "   or: $0 mode=read  bs=<blocksize> if=<fileName> (<blocknum>,<bytes>...)\n";
    print "   or: $0 {printXtermScript|printSilentScript} (recommend XtermScript)\n";

    exit 0;
}


sub printXtermScript {
    my @randPrimes;
    my $i;
    for ($i = 0; $i < 6; $i++) {
        $randPrimes[$i] = $primeList [int (rand (scalar @primeList))]; 
    }

    print "# Instructions:\n";
    print "# Make sure $prefixPath exists on machine to test, or edit $0 to change \$prefixPath\n";
    print "# \n";
    print "# Put the following script on a machine that is not to be tested, and has passwordless login\n";
    print "# via SSH to the machine to test. Run it, wait for 3-5 minutes, then ctrl-c each window, saving\n";
    print "# the closeThisOneLast window for last. After you close it, a bunch more windows will open spewing\n";
    print "# random numbers. After a minute or two, pull the plug on the machine to be tested. Write down the\n";
    print "# last 10-15 numbers (copy/paste might be better) from each window, noting which window you get each\n";
    print "# set of numbers from.\n";
    print "# Login to the machine (now that it's powered up again) and run the script with mode=read, like this:\n";
    print "#   $0 mode=read bs=$blockSize if=...device01.dat <digg1> <digg2> <digg3> ... <diggN>\n";
    print "# If any MD5sum are unlike the others, you know the data is corrupt; there's a faulty config or lying\n";
    print "# piece of hardware. The .pl script must be in ~/tmp on the machine to be tested. Script follows.\n";
    print "\n\n";
    print "# change the following ENV variable to the hostname of the machine you want to test\n";
    print "testMachine='dbslave01'\n";
    print "# first do zeroes\n";
    print "xterm -T init -geometry 120x20 -e 'ssh '\$testMachine' \"~/tmp/diskTest.pl mode=write bs=4096 pt=init of=$prefixPath/device01.dat mmb=2000,$defaultScriptDevSz,$randPrimes[0]\"' &\n";
    print "xterm -T init -geometry 120x20 -e 'ssh '\$testMachine' \"~/tmp/diskTest.pl mode=write bs=4096 pt=init of=$prefixPath/device02.dat mmb=2000,$defaultScriptDevSz,$randPrimes[1]\"' &\n";
    print "xterm -T init -geometry 120x20 -e 'ssh '\$testMachine' \"~/tmp/diskTest.pl mode=write bs=4096 pt=init of=$prefixPath/device03.dat mmb=2000,$defaultScriptDevSz,$randPrimes[2]\"' &\n";
    print "xterm -T init -geometry 120x20 -e 'ssh '\$testMachine' \"~/tmp/diskTest.pl mode=write bs=4096 pt=init of=$prefixPath/device04.dat mmb=2000,$defaultScriptDevSz,$randPrimes[3]\"' &\n";
    print "xterm -T init -geometry 120x20 -e 'ssh '\$testMachine' \"~/tmp/diskTest.pl mode=write bs=4096 pt=init of=$prefixPath/device05.dat mmb=2000,$defaultScriptDevSz,$randPrimes[4]\"' &\n";
    print "xterm -T init -geometry 120x20 -e 'ssh '\$testMachine' \"~/tmp/diskTest.pl mode=write bs=4096 pt=init of=$prefixPath/device06.dat mmb=2000,$defaultScriptDevSz,$randPrimes[5]\"' &\n";
    print "xterm -T closeThisOneLast\n";
    print "\n";
    print "# then actual data\n";
    print "xterm -T digg   -geometry 120x20 -e 'ssh '\$testMachine' \"           ~/tmp/diskTest.pl mode=write bs=4096 pt=digg   of=$prefixPath/device01.dat mmb=2000,$defaultScriptDevSz,$randPrimes[0]\"' &\n";
    print "xterm -T lucy   -geometry 120x20 -e 'ssh '\$testMachine' \" sleep 1 ; ~/tmp/diskTest.pl mode=write bs=4096 pt=lucy   of=$prefixPath/device02.dat mmb=2000,$defaultScriptDevSz,$randPrimes[1]\"' &\n";
    print "xterm -T iotbit -geometry 120x20 -e 'ssh '\$testMachine' \" sleep 2 ; ~/tmp/diskTest.pl mode=write bs=4096 pt=iotbit of=$prefixPath/device03.dat mmb=2000,$defaultScriptDevSz,$randPrimes[2]\"' &\n";
    print "xterm -T rand1  -geometry 120x20 -e 'ssh '\$testMachine' \" sleep 3 ; ~/tmp/diskTest.pl mode=write bs=4096 pt=rand1  of=$prefixPath/device04.dat mmb=2000,$defaultScriptDevSz,$randPrimes[3]\"' &\n";
    print "xterm -T rand2  -geometry 120x20 -e 'ssh '\$testMachine' \" sleep 4 ; ~/tmp/diskTest.pl mode=write bs=4096 pt=rand2  of=$prefixPath/device05.dat mmb=2000,$defaultScriptDevSz,$randPrimes[4]\"' &\n";
    print "xterm -T rawk   -geometry 120x20 -e 'ssh '\$testMachine' \" sleep 5 ; ~/tmp/diskTest.pl mode=write bs=4096 pt=rawk   of=$prefixPath/device06.dat mmb=2000,$defaultScriptDevSz,$randPrimes[5]\"' &\n";

    exit 0;
}

sub printSilentScript {
    my @randPrimes;
    my $i;
    for ($i = 0; $i < 12; $i++) {
        $randPrimes[$i] = $primeList [int (rand (scalar @primeList))]; 
    }

    print "This is if you don't like the Xterm method, but I recommend the Xterm method over this. Instructions:\n";
    print "  #-- run the following first\n";
    print "  $0 mode=write bs=2048   pt=zero   of=$prefixPath/device06.dat mmb=2000,500000,$randPrimes[10] \n";
    print "  #-- let that run for a couple of minutes, then ctrl-c it. next several are to put disks under stress\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=digg   of=$prefixPath/device01.dat mmb=2000,$defaultScriptDevSz,$randPrimes[0] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=lucy   of=$prefixPath/device02.dat mmb=2000,$defaultScriptDevSz,$randPrimes[1] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=iotbit of=$prefixPath/device03.dat mmb=2000,$defaultScriptDevSz,$randPrimes[2] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=rand1  of=$prefixPath/device04.dat mmb=2000,$defaultScriptDevSz,$randPrimes[3] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=rand2  of=$prefixPath/device05.dat mmb=2000,$defaultScriptDevSz,$randPrimes[4] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=lucy   of=$prefixPath/device01.dat mmb=2000,$defaultScriptDevSz,$randPrimes[5] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=iotbit of=$prefixPath/device02.dat mmb=2000,$defaultScriptDevSz,$randPrimes[6] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=rand1  of=$prefixPath/device03.dat mmb=2000,$defaultScriptDevSz,$randPrimes[7] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=rand2  of=$prefixPath/device04.dat mmb=2000,$defaultScriptDevSz,$randPrimes[8] silent &\n";
    print "  sleep 1 ; $0 mode=write bs=4096  pt=digg   of=$prefixPath/device05.dat mmb=2000,$defaultScriptDevSz,$randPrimes[9] silent &\n";
    print "  #-- make sure via iostat that your I/O subsystem is pretty darned busy\n";
    print "  $0 mode=write bs=2048   pt=rawk   of=$prefixPath/device06.dat mmb=2000,500000,$randPrimes[10] \n";
    print "  #-- pull the plug, reboot the machine, then run this...\n";
    print "  $0 mode=read  bs=2048   if=$prefixPath/device06.dat <block> (<block>...)\n";
    print "The list of <block>'s to pass to mode=read are the last few you see on your screen before the\n";
    print "power went out. Say 10 of them. They should all have identical md5sum's.\n";

    exit 0;
}