Perl NAR Database category parser

From MetaBase

Jump to: navigation, search
#!/usr/bin/perl -w

my $bodyFlag;
my %catHash;
my $category;

while(<>){
  next unless
    $bodyFlag || /^\<h1 class\=\"summary\"\>NAR Database Categories List\<\/h1\>/;
  
  $bodyFlag = 1;

  last if
    /^\<\!\-\- end body \-\-\>/;
  
  if    (/^   \<div class\=\"listcategory\"\> \<a href\=\"\/nar\/database\/cat\/(\d+)\"\> (.*?) \<\/a\> \<\/div\>/){
    $category = $2;
    print "<CatPageStartHere>\n";
    print "'''Category:NARDatabase:$category'''\n";
    print "'''$category''' is [http://www.oxfordjournals.org/nar/database/cat/$1 NAR Database Category No. $1]\n\n";
    print "[[Category:NARDatabase|$category]]\n";
    print "<CatPageEndHere>\n";
  }
  elsif (/^   \<div class\=\"listsubcategory\"\> \<a href\=\"\/nar\/database\/subcat\/(\d+)\/(\d+)\"\> (.*?) \<\/a\> \<\/div\>/){
    my $subCategory = $3;
    print "<CatPageStartHere>\n";
    print "'''Category:NARDatabase:$subCategory'''\n";
    print "'''$subCategory''' is [http://www.oxfordjournals.org/nar/database/subcat/$1/$2 NAR Database Sub-Category No. $1/$2]\n\n";
    print "[[Category:NARDatabase:$category|$subCategory]]\n";
    print "<CatPageEndHere>\n";
  }
  elsif (/^   \<div class\=\"listpaper\"\> \<a href\=\"\/nar\/database\/summary\/(\d+)\"\> (.*?) \<\/a\> \<\/div\>/){
    ## Nothing to be done here;
  }
  elsif (/^\s+$/){
    ## Really nothing to be done here;
  }
  else{
    warn "What? $. : ", $_
  }
}
Personal tools