Here's the backup system I used to create minimal-space daily snapshots using hadrlinks and rsync. I wrote archive.pl to replace cp -al, since the cp on OS X doesn't behave with linux device files, and it generally misbehaved.
All you really need to do is edit the HOSTS entry in run_backups, and possibly change the path in BACKUPDIR & ARCHIVE_PL, then make the scripts executable and call run_backups once a day (preferably from cron). Eventually you'll want to clean the archives up, too. You can create files in the EXCLUDE_FILE_PATH directory named hostname.excludes following rsync's --exclude-from-file format.
The script expects to be run by root, and expects to be able to ssh to the backed-up hosts without a password. Around 100 lines down is a perl oneliner that finds the reiser, ext2 and ext3 filesystems with a regexp. If you're running something else, you'll have to add it to the regexp.
I'm planning to get a bugzilla set up and bundle these things together "shortly" - but that could mean months. Email me (aardback@dannysauer.com) until then. :)
|
#!/bin/bash
#
# backup.sh - Danny Sauer, 3/2003
# this needs run as root in order to preserve permissions and make device nodes
# also, each host needs root's private key in its .ssh/authorized_keys
#
# Jan-16/2004
# add -H and --copy-unsafe-links (whoops)
#
# Jan-13/2005
# remove copy-unsafe-links, because it copies them as files; symlinks
# are merely recreated when that arg's left out (and when --safe-link"
# is also omitted)
# modify to get a list of local mounts, and sync each mount point
# individually, using -x. This is to avoid copying NFS-mounted dirs
# that are already backed up elsewhere :) - and should reduce the amount
# of memory required, by reducing the number of files in memory when
# syncing with the larger servers (esp www)
# add /sys to list of excluded dirs
# move rsync args to a separate variable
# fix date of last change (said 2003, but was actually 2004!)
# print out free space before and after, to get an idea of space usage
# put source and destination in variables, to self-document a bit better
#
# Jan-14/2005
# Add local host handling - to back up the backup server (presently gutenberg)
# Commented it out, until I can figure out a good way to exclude the
# $BACKUPDIR from being backed up - that'd be a whole heap of no good...
# Realised that doing subdirs would delete everything below that tree, so
# if you do / and /usr is next, it will have removed /usr before syncing
# it, and therefore will break all of the hardlinks. Doh.
#
# Feb-22/2005
# Calculate the directories to be included and excluded dynamically with a
# perl one-liner. It's not super smart, but it should handle most cases.
# It'll fail if a directory to backed up is mounted below a directory that
# is not to be backed up, and there's a directory between them, but how
# likely is that? --exclude /tmp --include /tmp/path/keepme will fail. So
# just don't do that!
# Add ability to read per-host exclude files
# Note: per-host excludes means I can back up localhost!
# Note: they're called "${EXCLUDE_FILE_PATH}/${HOST}.excludes"
# Cleaned up rsync argument handling a little (*I* think it's cleaner)
#
#
# space-separated list of hosts to be backed up
HOSTS="calypso router www dev dialin web2 gutenberg"
# programs with full paths
RSYNC="/usr/bin/rsync"
ARCHIVE_PL="/root/archive.pl"
DATE="/bin/date"
# variables
TODAY=`$DATE +'%Y.%m.%d'`;
BACKUPDIR="/mnt/backup"
ARCHIVEDIR="$BACKUPDIR/archive"
EXCLUDE_FILE_PATH="/etc/rsync"
# rsync arguments
RSYNC_ARGS="-q -a -e ssh -H --delete --delete-excluded --ignore-errors"
# verify that our output directories exist before using them
if [ ! -d $BACKUPDIR ]; then
echo "creating '$BACKLUPDIR'"
mkdir -p $BACKUPDIR
fi
if [ ! -d $ARCHIVEDIR ]; then
echo "creating '$ARCHIVEDIR'"
mkdir -p $ARCHIVEDIR
fi
# record the free space before today
echo -n "before: "
/usr/bin/df -m $ARCHIVEDIR
# backup each host
for HOST in $HOSTS; do
echo "backing up '$HOST':"
OUT="$ARCHIVEDIR/$TODAY/$HOST"
# archive.pl needs the top output dir to exist and be empty
if [ -d $OUT ]; then
if rmdir $OUT && mkdir -p $OUT; then
# if rmdir and mkdir succeed, then the dir was empty. No harm done?
echo -e "\twarn: recreated $OUT"
else
echo -e "\t$OUT exists and is non-empty - there will be problems"
echo -e "\ttry 'rm -r $OUT' and run again"
# rm -r $OUT # this is probably not such a good thing to do.
echo -e "\tskipping backup of $HOST!"
continue # it's probably best to not do anything with this host
fi
else
mkdir -p $OUT
fi
# make snapshot directory
echo -e "\tarchiving\t("`$DATE +'%T'`")"
$ARCHIVE_PL $BACKUPDIR/$HOST $OUT
# get a list of directories to include/exclude
# (this in/excludes based on fstype, presently ext2, ext3, and reiser)
EXTRA_ARGS=`ssh $HOST mount | \
perl -ane'print (($F[4] =~ /^(?:reiserfs|ext[23])$/) ? "--include" : "--exclud
e"); print "=\"$F[2]\" " '`
# explicitly exclude some common temp dirs
EXTRA_ARGS="$EXTRA_ARGS --exclude='/tmp/*'"
EXTRA_ARGS="$EXTRA_ARGS --exclude='/var/tmp/*'"
EXTRA_ARGS="$EXTRA_ARGS --exclude='/usr/tmp/*'"
EXTRA_ARGS="$EXTRA_ARGS --exclude='/var/cache/*'"
EXTRA_ARGS="$EXTRA_ARGS --exclude='*~'" # these are backup files - they waste spac
e :)
# include a host-specific exclude file, if one exists
EXCLUDEFILE="${EXCLUDE_FILE_PATH}/${HOST}.excludes"
if [[ -f $EXCLUDEFILE && -r $EXCLUDEFILE ]]; then
EXTRA_ARGS="$EXTRA_ARGS --exclude-from='${EXCLUDEFILE}'"
fi
#DIRS=`ssh $HOST mount | \
# perl -lane'print $F[2] if $F[4] =~ /^(?:reiserfs|ext[23])$/'`
#BADDIRS=`ssh $HOST mount | \
# perl -lane'print $F[2] if $F[4] !~ /^(?:reiserfs|ext[23])$/'`
# synchronize backup directory with server
echo -e "\tsyncing\t("`$DATE +'%T'`")"
$RSYNC $RSYNC_ARGS $EXTRA_ARGS root@${HOST}:/ ${BACKUPDIR}/$HOST
## $RSYNC -qa -e ssh -H --delete --delete-excluded --ignore-errors --exclude '/pr
oc/*' --exclude '/sys/*' --exclude '/tmp/*' --exclude '/usr/tmp/*' --exclude '/var/tmp
/*' root@$HOST:/ $BACKUPDIR/$HOST
# print time of completion
echo -e "\tdone\t("`$DATE +'%T'`")"
done
# record the free space after today
echo -n "after: "
/usr/bin/df -m $ARCHIVEDIR
|
#!/usr/bin/perl -w
use strict;
my $cp_command = '/usr/bin/cp';
sub archive($$);
sub cp($$);
my $source = shift or die "usage: $0 source_dir destination_dir\n";
my $dest = shift or die "usage: $0 source_dir destination_dir\n";
unless( -d $source ){
die "source must be a directory\n";
}
mkdir($dest);
archive($source, $dest);
1;
# copy one file
# note - symlinks have no permissions, hardlinks have the same permissions
# as the source file, and "cp -a" preserves permissions. Don't use
# "cp -p", though - must use at least "cp -pR" to copy device nodes
sub cp($$){
my($s, $d) = @_;
# make sure the -l is the first test done...
if( -l $s ){
# recreate symlink
# print "\tsymlink";
symlink(readlink($s), $d);
}elsif( -p _ || -S _ || -b _ || -c _ ){
# use system copy call (rewrite this in perl someday)
# print "\tnode - '$s'\n";
system("$cp_command -a $s $d");
}else{
# create hardlink
# print "\thardlink";
link($s, $d);
}
}
#walk directory tree
# this just wouldn't work with a while(readdir()), so now readdir into @fs
sub archive($$){
my($s, $d) = @_;
opendir(S, $s) or die "can't opendir '$s': $!";
my @fs = readdir S;
closedir S;
foreach my $f (@fs){
next if($f eq '.' || $f eq '..');
#print "$s/$f\n";
# it seems that a symlink to a dir returns true for -d :(
# also, the -l must be done first, so perl does an lstat(), not a stat()
if((! -l "$s/$f") && -d _){
my $mode = (stat(_))[2] & 07777;
my $user = (stat(_))[4];
my $group = (stat(_))[5];
mkdir("$d/$f");
chmod($mode, "$d/$f");
chown($user, $group, "$d/$f");
archive("$s/$f", "$d/$f");
}else{
#print "$s/$f ->'$d/$f\n";
cp("$s/$f", "$d/$f");
}
}
} |