#!/usr/bin/perl # # Cluester v0.1, written Hexacorn.com, 2012-05 # # This is a simple script that attempts to divide list of files # into clusters. Each cluster is based on a timestamp. # Each timestamp is normalized to 1 minute (60 seconds), # so that grouping is easier (and it has a wider range). # # Usage: # perl cluester.pl file_list_from_fls # # list from fls can be obtained via # fls -lrm -i -f \\.\ > # e.g. # fls -lrm f: -i raw -f ntfs \\.\f: > list # use strict; use warnings; $| = 1; print STDERR " =================================================================== Cluester v0.1, written by \@ Hexacorn.com, 2012-05 =================================================================== "; my $target = shift or die "\n\nError: Gimme a filename!\n"; if (!-f $target) { print "\n\nError: Don't know what to do with '$target'!\n"; exit(-1); } print STDERR "Processing \"$target\" file... \n"; my $file_size = -s $target; my $progress = 0; my $progress_temp = 0; my %filesystem; open F, "<$target" || die "Can't open $target!\n"; binmode F; while () { s/[\r\n]+//g; $progress += length($_); $progress_temp += length($_); if ($progress_temp > 65536) { print STDERR int(100*$progress/$file_size)."%\r"; $progress_temp = 0; } # $row[0] = MD5 # $row[1] = name # $row[2] = inode # $row[3] = mode_as_string # $row[4] = UID # $row[5] = GID # $row[6] = size # $row[7] = atime # $row[8] = mtime # $row[9] = ctime # $row[10] = crtime my @row = split (/\|/); my $path = $row [1]; my $crtime = $row [10]; my $GMT = epoch2GMT ($crtime); my $GMT2 = epoch2GMT (60*int($crtime/60)); $filesystem{$GMT2}[0]++; $filesystem{$GMT2}[ $filesystem{$GMT2}[0] ]="$GMT $crtime $path"; # print STDERR "$ctime\t$path\n"; } close F; print STDERR "100%\n"; for my $k (sort keys %filesystem) { my $prefix=''; if ($filesystem{$k}[0]>=10) { $prefix = ' ' x 22; } for (my $i=0; $i<$filesystem{$k}[0]; $i++) { print "$prefix$k $filesystem{$k}[1+$i]\n"; } print "\n"; } sub epoch2GMT { my $t = shift; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($t); $year += 1900; $mon += 1; return sprintf ("%d-%02d-%02d %02d:%02d:%02d", $year,$mon,$mday,$hour,$min,$sec); }