angerguy
01-18-2010, 08:55 AM
hello members, im new to php and xml and i had a problem in php and i have been working on for a week and i coudnt find a solution. i want to grab a url and take all the links inside it with its title and insert it in a csv file or xml. finally i had a script that creates an rss feed for the website but no links is shown. any help is really appreciated.
<?php
$url = 'http://www.lebweb.com/dir/directory';
$title = 'lebweb links';
$description = 'Links';
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
header('Content-type: text/xml; charset=utf-8', true);
echo '<?xml version="1.0" encoding="UTF-8"?'.'>' . "\n";
echo '<rss version="2.0">' . "\n";
echo '<channel>' . "\n";
echo ' <title>' . $title . '</title>' . "\n";
echo ' <link>' . $url . '</link>' . "\n";
echo ' <description>' . $description . '</description>' . "\n";
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt($curl, CURLOPT_TIMEOUT, 20 );
$html = curl_exec( $curl );
$html = @mb_convert_encoding($html, 'HTML-ENTITIES', 'utf-8');
curl_close( $curl );
$dom = new DOMDocument();
$dom->loadHTML($html);
$tags = $dom->getElementsByTagName('a');
$file = fopen ('acontents.csv',w);
foreach ($tags as $tag) {
echo $tag->getAttribute('href').' | '.$tag->nodeValue."\n<br>";
fwrite ($tag->getAttribute('href').' | '.$tag->nodeValue."\n<br>");
}
fclose($file);
echo '</channel></rss>';
?>
<?php
$url = 'http://www.lebweb.com/dir/directory';
$title = 'lebweb links';
$description = 'Links';
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
header('Content-type: text/xml; charset=utf-8', true);
echo '<?xml version="1.0" encoding="UTF-8"?'.'>' . "\n";
echo '<rss version="2.0">' . "\n";
echo '<channel>' . "\n";
echo ' <title>' . $title . '</title>' . "\n";
echo ' <link>' . $url . '</link>' . "\n";
echo ' <description>' . $description . '</description>' . "\n";
$curl = curl_init($url);
curl_setopt($curl, CURLOPT_USERAGENT, $userAgent);
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt($curl, CURLOPT_TIMEOUT, 20 );
$html = curl_exec( $curl );
$html = @mb_convert_encoding($html, 'HTML-ENTITIES', 'utf-8');
curl_close( $curl );
$dom = new DOMDocument();
$dom->loadHTML($html);
$tags = $dom->getElementsByTagName('a');
$file = fopen ('acontents.csv',w);
foreach ($tags as $tag) {
echo $tag->getAttribute('href').' | '.$tag->nodeValue."\n<br>";
fwrite ($tag->getAttribute('href').' | '.$tag->nodeValue."\n<br>");
}
fclose($file);
echo '</channel></rss>';
?>