#!/perl/bin
use strict;

# stripdomains.pl 1.0, 3.Aug.1998
# by Kajetan Hinner, Sociology Dep., University of Rostock
# belongs to http://www.hinner.com/programs/tracedomains.pl
# please read this file for further details


my $i = 1; 		# counter

my $zeile = 1;		# aktuelle Zeile

my $domain = ""; 	# zusammengebastelter Domainname
my $domaincounter =0;	# number of valid domains in infile

my @linesread;		# eingelesene datei, liste

my $file = "Domains.txt";		# sample infile
my $outfile = "Domains.weed";		# outfile, only containing valid domain names	


open (FILEH, "< $file") or die "kann $file nicht oeffnen: $!";

@linesread = <FILEH>;		# read whole files in one big slurp

close FILEH;

die "error while reading $file\n" unless @linesread;


open (FILEOUT, "> $outfile") or die "kann $outfile nicht oeffnen: $!";


foreach (@linesread) {
   chop;		# get rid of \n
   next if /^#/;	# skip comments
   next if /^\s*$/;	# skip lines consisting only of whitespace
   next if /[\@A-Z]/;	# skip if there are any upper case letters or @
			# NOTE: this only works if your source provides
			# all domain names upper case!

   s/^\s+//;	# get rid of leading whitespace


   # to match your domain name replace the "de" with your tld.
   # you can also write \S{2,3} but this sometimes matches on unwanted 
   # occasions, like abbreviations etc.

   if (/^\S*\.de\s*$/)
     {
#     print "Match: '$_'\n";
      (print FILEOUT "$_\n")   or die "kann nicht nach $outfile schreiben $!";
      $domaincounter++;
     }
   }

close (FILEOUT);

print ("Number of found domains $domaincounter\n");