#!/perl/bin use strict; # stripdomains.pl 1.0, 3.Aug.1998 # by Kajetan Hinner, Sociology Dep., University of Rostock # belongs to http://www.hinner.com/programs/tracedomains.pl # please read this file for further details my $i = 1; # counter my $zeile = 1; # aktuelle Zeile my $domain = ""; # zusammengebastelter Domainname my $domaincounter =0; # number of valid domains in infile my @linesread; # eingelesene datei, liste my $file = "Domains.txt"; # sample infile my $outfile = "Domains.weed"; # outfile, only containing valid domain names open (FILEH, "< $file") or die "kann $file nicht oeffnen: $!"; @linesread = ; # read whole files in one big slurp close FILEH; die "error while reading $file\n" unless @linesread; open (FILEOUT, "> $outfile") or die "kann $outfile nicht oeffnen: $!"; foreach (@linesread) { chop; # get rid of \n next if /^#/; # skip comments next if /^\s*$/; # skip lines consisting only of whitespace next if /[\@A-Z]/; # skip if there are any upper case letters or @ # NOTE: this only works if your source provides # all domain names upper case! s/^\s+//; # get rid of leading whitespace # to match your domain name replace the "de" with your tld. # you can also write \S{2,3} but this sometimes matches on unwanted # occasions, like abbreviations etc. if (/^\S*\.de\s*$/) { # print "Match: '$_'\n"; (print FILEOUT "$_\n") or die "kann nicht nach $outfile schreiben $!"; $domaincounter++; } } close (FILEOUT); print ("Number of found domains $domaincounter\n");