#!/perl/bin use strict; # rand.pl - Version 1.0 (Aug 3th, 1998) # # Perl-Script by Kajetan Hinner. This program is Mailware, every user should write to Kajetan@Hinner.com # # Also note that there is no taint checking AT ALL. This could be a security problem, if you don't # know the quality of input data fed into rand.pl. # # # rand.pl was developed using Linux 2.1.x, ircII 4.4, Perl 5.004 and KDE 4. As editor for large files I recommend nedit. # # # How this script works: # # This perl program was developed for tracedomains.pl (www.hinner.com/programs). But it's also # useful to randomly select a certain number of lines from an ASCII input file. # Not that the input file is read at once, so supply enough memory (RAM) in your machine. # # You must supply a file named Domains.txt. Then you type perl rand.pl . # is the number of lines you want to select randomly from the input file Domains.txt. # Two random numbers are taken, the first two and the last character are cut off and those # strings are concatenated. From the middle there is the actual random number taken. If it's # to large (index for line of input line), it's divided by two until it fits. This specific # line number of the input file (which must be named Domains.txt) is written to the outfile # Domains.rand # # # Author: Kajetan Hinner, University of Rostock, Sociology Dep., Kajetan@Hinner.com # srand; #set random seed use Math::TrulyRandom; my $i = 1; # counter my $zeile = 1; # aktuelle Zeile my $domain = ""; # zusammengebastelter Domainname my $domaincounter =0; # number of valid domains in infile my @linesread; # eingelesene datei, liste my $file = "Domains.txt"; # sample infile my $randfile = "Domains.rand"; # outfile, random sample of $outfile my $wanteddomains; # number of random domains wanted #scan command line arguments my $self = $0; $self =~ s!^.*/!!; if (@ARGV != 1) { die "Usage: perl $self \nInput file must be Domains.txt in current path!\n"; } $wanteddomains = $ARGV[0]; # get parameter open (FILEH, "< $file") or die "can't open $file: $!"; @linesread = ; # read whole files in one big slurp close FILEH; die "error while reading $file\n" unless @linesread; open (FILEOUT, "> $randfile") or die "can't open $randfile: $!"; my @randomvalues; my @selected; # list; every already used random is flagged my @index; # all random values which are used (index to infile line#) my $value; # random value my $i=0; # while-loop-counter my $lines = @linesread; # number of lines while ($i<$wanteddomains) { my $value1=truly_random_value(); my $value2=truly_random_value(); print ("Plain random values: $value1, $value2\n"); # chop off first two and last digit $value1 =~ /..(.*)./; $value1=$1; $value2 =~ /..(.*)./; $value2=$1; $value = $value1 . $value2; # concatenate both values. print "built figure: $value, $_\n"; $randomvalues[$i]=$value; # save for later analysis # print ("calculated value: $value\n"); my $length = length($value); my $index = substr ($value, $length/3, length($lines)); # print ("length $length, index $index\n"); while ($index > $lines) { $index /= 2; } $index =~ s/\.\d*$//; if (!($selected[$index])) { $selected[$index]=1; $index[$i]=$index; $i++; # next value print ("$i: selected $index\n"); } } # while loop for ($i=0; $i<$wanteddomains; $i++) { (print FILEOUT "$linesread[$index[$i]]") or die "can't write to $randfile $!"; } close (FILEOUT); my @randsorted = sort {$a <=> $b} @randomvalues; # sort numerically ascending print ("Some statistical information about used random values\n"); my $min=$randsorted[0]; my $max=$randsorted[@randsorted-1]; my $range=$max-$min; print ("min: $min, max: $max, median: $randsorted[@randsorted/2]\n");