Im trying to extract all the links from a string, I dont know the exact number on links.
This works fine, but its just extracting the first link on the page. Is it possible to loop all the links or do I need to rebuild it?
if($content =~ /<a href=\"(.+?)\">(.+?)<\/a>/i){
print "Url: $1<br>Text: $2";
}else{
print "Nothing found";
}
@hrefs = ($content =~ m¦<a.*href\s*=\s*\"(.+?)\">(.+?)</a>¦ig);
foreach $href (@hrefs){
print "$href<hr>";
}
But the next problem is that the link text and the url is stored in diffrent post in the array so the result look like this:
link url #1
--------------------------------------------------------------
link text #1
--------------------------------------------------------------
link url #2
--------------------------------------------------------------
link text #2
And that isent really what i want :/...
$found = 0;
while ($content =~ /<a href=\"(.+?)\">(.+?)<\/a>/ig) {
print "Url: $1<br>Text: $2";
$found = 1;
}
if ($found == 0) {
print "Nothing found";
}
# haven't tested this, but the key thing is the 'g' flag
# at the end of the regexp operator to make the match
# global
#!/usr/bin/perl
use strict;
use warnings;
use HTML::LinkExtractor;
use LWP::Simple qw(get);
# get a page to test
my $html = get('http://search.cpan.org/recent');
# setup the parser
my $LX = new HTML::LinkExtractor();
$LX->strip(1); # just anchor text, not entire tag
$LX->parse(\$html);
for my $Link (@{$LX->links}) {
my $tag = $$Link{tag};
# only regular links
next unless $tag eq 'a';
my $href = $$Link{href};
my $text = $$Link{_TEXT};
print $text, " -> ", $href, "\n";
}
undef $LX;