dsp77
12-08-2010, 02:05 PM
I'm trying to crawl for links in a specific website and show them at the end. The problem i'm facing is that it only show the links from the specific page not the whole pages in the website. I tried several loops with no success please give some advise.
Here is the code:
<?php
if (isset($_POST['Submit'])) {
function getLinks($link)
{
/*** return array ***/
$ret = array();
/*** a new dom object ***/
$dom = new domDocument;
/*** get the HTML (suppress errors) ***/
@$dom->loadHTML(file_get_contents($link));
/*** remove silly white space ***/
$dom->preserveWhiteSpace = false;
/*** get the links from the HTML ***/
$links = $dom->getElementsByTagName('a');
/*** loop over the links ***/
foreach ($links as $tag)
{
$ret[$tag->getAttribute('href')] = $tag->childNodes->item(0)->nodeValue;
}
return $ret;
}
/*** a link to search ***/
$link = $_POST['address'];
/*** get the links ***/
$urls = getLinks($link);
/*** check for results ***/
if(sizeof($urls) > 0)
{
foreach($urls as $key=>$value)
{
if (preg_match('/^(http|https):\/\/([a-z0-9-]\.+)*/i',$key)) {
echo '<span style="color:RED;">' . $key .' - external</span><br >';
} else {
echo '<span style="color:BLUE;">' . $link . $key . ' - internal</span><br >';
}
}
}
else
{
echo "No links found at $link";
}
}
?>
<br /><br />
<form action="" method="post" enctype="multipart/form-data" name="link">
<input name="address" type="text" value="" />
<input name="Submit" type="Submit" />
</form>
Here is the code:
<?php
if (isset($_POST['Submit'])) {
function getLinks($link)
{
/*** return array ***/
$ret = array();
/*** a new dom object ***/
$dom = new domDocument;
/*** get the HTML (suppress errors) ***/
@$dom->loadHTML(file_get_contents($link));
/*** remove silly white space ***/
$dom->preserveWhiteSpace = false;
/*** get the links from the HTML ***/
$links = $dom->getElementsByTagName('a');
/*** loop over the links ***/
foreach ($links as $tag)
{
$ret[$tag->getAttribute('href')] = $tag->childNodes->item(0)->nodeValue;
}
return $ret;
}
/*** a link to search ***/
$link = $_POST['address'];
/*** get the links ***/
$urls = getLinks($link);
/*** check for results ***/
if(sizeof($urls) > 0)
{
foreach($urls as $key=>$value)
{
if (preg_match('/^(http|https):\/\/([a-z0-9-]\.+)*/i',$key)) {
echo '<span style="color:RED;">' . $key .' - external</span><br >';
} else {
echo '<span style="color:BLUE;">' . $link . $key . ' - internal</span><br >';
}
}
}
else
{
echo "No links found at $link";
}
}
?>
<br /><br />
<form action="" method="post" enctype="multipart/form-data" name="link">
<input name="address" type="text" value="" />
<input name="Submit" type="Submit" />
</form>