#!/bin/sh # Copyright (C) 2010, Jeremy Laidman # under the GNU General Public License. # For details: http://www.gnu.org/licenses/gpl.txt # Version 0.2 - 23 Sep 2010 # look for files with the same name and content (verified using md5 checksum) # to a target directory and replace the file with a hardlink to the # target file # give me source and target directory roots # roots must be corresponding, so that # files are in the same place, otherwise they # won't be found DEBUG=0 VERBOSE=0 die() { echo "$@" >&2 exit 1 } same_file() { # we do a sum first, cos it's quicker # if the same, we do an md5sum to be sure S1=`sum "$1"` S2=`sum "$2"` [ 0$DEBUG -gt 2 ] && echo "Sums: $S1 $S2" >&2 if [ "$S1" = "$S2" ]; then S1=`md5sum "$1" | { read A B; echo $A; }` S2=`md5sum "$2" | { read A B; echo $A; }` [ 0$DEBUG -gt 2 ] && echo "Sums: $S1 $S2" >&2 if [ "$S1" = "$S2" ]; then [ 0$DEBUG -gt 2 ] && echo "Same file" >&2 return 0 fi fi return 1 } same_inode() { # we compare the inode numbers of the two files # and return true if they are the same I1=`ls -i "$1" | { read A B; echo $A; }` I2=`ls -i "$2" | { read A B; echo $A; }` [ 0$DEBUG -gt 2 ] && echo "Inodes: $I1 $I2" >&2 [ "$I1" -a "$I2" ] || die "Unable to find inodes for '$1' and '$2'" if [ "$I1" = "$I2" ]; then [ 0$DEBUG -gt 2 ] && echo "Same inode" >&2 return 0 fi return 1 } linkify_it() { F1="$1" # source F2="$2" # destination TF=`dirname "$F1"`/.linkify.`basename "$F1"`.$$ mv "$F1" "$TF" || die "Unable to rename file, bailing: '$F1'" ln "$F2" "$F1" || die "Unable to create symlink, bailing: '$F1'" touch --reference="$F2" "$TF" || die "Unable to set timestamp on new directory entry, bailing: '$F2'" chmod --reference="$F2" "$TF" || die "Unable to set permissions on new directory entry, bailing: '$F2'" chown --reference="$F2" "$TF" || die "Unable to set ownership on new directory entry, bailing: '$F2'" rm -f "$TF" || die "Unable to clean up temp file, bailing: '$T1'" } usage() { printf "Usage: $0 [ -d | -h ] source-path dest-path\n\t-d = debug\n\t-h = help\n" } # get switches while true; do case "$1" in -d|--debug) DEBUG=`expr 0$DEBUG + 1`; shift;; -v|--verbose) VERBOSE=`expr 0$VERBOSE + 1`; shift;; -h|--help) usage; exit 0;; -*) die "Invalid switch: $1";; *) break;; esac done # now we should only have the two paths left [ "$2" ] || die "Specify source and target directory" [ "$3" ] && die "Specify source and target directory" SRCDIR="$1" DSTDIR="$2" [ 0$DEBUG -gt 0 ] && VERBOSE=1 # debug implies verbose [ 0$DEBUG -gt 0 ] && echo "DEBUG is $DEBUG, SRC=$SRCDIR, DST=$DSTDIR" >&2 [ -d "$SRCDIR" ] || die "Invalid source directory $SRCDIR" [ -d "$DSTDIR" ] || die "Invalid target directory $DSTDIR" ( cd "$SRCDIR" ) || die "Unable to cd to source directory" FILECOUNT=0 LINKCOUNT=0 [ 0$VERBOSE -gt 0 ] && echo "Linkifying '$SRCDIR' using '$DSTDIR'" ( cd "$DSTDIR"; find . -type f ) | while read FILE; do [ 0$VERBOSE -gt 0 ] && printf "$LINKCOUNT/$FILECOUNT\r" FILECOUNT=`expr $FILECOUNT + 1` [ 0$DEBUG -gt 2 ] && echo "Maybe $FILE" >&2 if [ -f "$SRCDIR/$FILE" ]; then [ 0$DEBUG -gt 1 ] && echo "Candidate: $FILE" >&2 if same_inode "$DSTDIR/$FILE" "$SRCDIR/$FILE"; then [ 0$DEBUG -gt 0 ] && echo "Same inode, already been linkified: '$FILE'" >&2 else [ 0$DEBUG -gt 1 ] && echo "Different inode detected." >&2 if same_file "$DSTDIR/$FILE" "$SRCDIR/$FILE"; then [ 0$DEBUG -gt 0 ] && echo "Same file detected with different inodes" >&2 [ 0$VERBOSE -gt 0 ] && echo "'$FILE'..." >&2 linkify_it "$SRCDIR/$FILE" "$DSTDIR/$FILE" LINKCOUNT=`expr $LINKCOUNT + 1` fi fi fi done [ 0$VERBOSE -gt 0 ] && printf "\n" # newline after ticker