This runs a local backup to a mounted file system (ext4 or btrfs). It does de-duplication at the file level; that is, the first backup copies everything, subsequent backups only copy the changed files and hard link to the unchanged files. RAID storage systems like NetApp de-duplicate at the block level but the snapshot paradigm is very similar.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
#!/bin/bash hostname=`hostname` now=`date "+%Y-%m-%dT%H:%M:%S"` echo -e "\n\nBackup of $hostname on `date`" #check args, set backupdir if [ $# -lt 1 ]; then echo "$0: missing path operand" echo "Usage: $0 DEST" exit 1; fi backupdir=$1 #make sure the destination dir is there if [ ! -d $backupdir/$hostname ]; then mkdir $backupdir/$hostname fi #run the backup rsync -aHA --stats --delete \ --exclude /home \ --exclude /boot \ --exclude /proc \ --exclude /sys \ --exclude /media \ --exclude /dev \ --exclude /tmp \ --exclude /run \ --exclude /mnt \ --exclude /exports \ --exclude /var/lock \ --exclude /var/run \ --exclude='ExcludeFromBackup' \ --link-dest=../current \ / \ $backupdir/$hostname/back-$now #update the "current" soft link rm $backupdir/$hostname/current ln -s back-$now $backupdir/$hostname/current #prune backups #After ten days we only keep weekly for daysago in `seq 11 90`; do date=`date --date="$daysago days ago" "+%Y-%m-%d"` dow=`date --date="$date" "+%a"` eom=`date --date="$[daysago-1] days ago" "+%-d"` #1 if it's the end of the month if [ "$dow" == "Fri" ]; then continue; fi if [ "$eom" == "1" ]; then continue; fi rm -rf $backupdir/$hostname/back-$date* done #After 90 days we only keep monthly for daysago in `seq 91 365`; do date=`date --date="$daysago days ago" "+%Y-%m-%d"` eom=`date --date="$[daysago-1] days ago" "+%-d"` #1 if it's the end of the month if [ "$eom" == "1" ]; then continue; fi rm -rf $backupdir/$hostname/back-$date* done |