[perl]
#!/usr/bin/perl
use HTML::LinkExtor;
my $FILENAME = 'file.html';
$parser = HTML::LinkExtor->new(undef, $base_url);
$parser->parse_file($FILENAME);
@links = $parser->links;
foreach $linkarray (@links) {
my @element = @$linkarray;
my $elt_type = shift @element; # element type
# possibly test whether this is an element we're interested in
while (@element) {
# extract the next attribute and its value
my ($attr_name, $attr_value) = splice(@element, 0, 2);
if ($elt_type eq 'a' && $attr_name eq 'href') {
print "ANCHOR: $attr_value\n"
}
}
}
[/perl]
Offline Extractor
[spadixbd.com...]
[focalmedia.net...]