markman641
01-22-2012, 04:49 AM
Hello! I have a simplexml and $xml->xpath script that is ALMOST complete. I have a script that will log into a website and extract info from a page that I put into it. It works perfectly, except I need it to extract just one more thing.
This is the code:
<?php
// INIT CURL
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://proleadsmedia.com/publishers/login.php?next');
curl_setopt ($ch, CURLOPT_POST, true);
curl_setopt ($ch, CURLOPT_POSTFIELDS, 'username=*************&password=************');
curl_setopt ($ch, CURLOPT_COOKIEJAR, './cookie.txt');
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
// EXECUTE 1st REQUEST (FORM LOGIN)
$store = curl_exec ($ch);
// SET FILE TO DOWNLOAD
curl_setopt($ch, CURLOPT_URL, 'http://proleadsmedia.com/publishers/campaigns/view.php?wid=592&cid=4811');
// EXECUTE 2nd REQUEST (FILE DOWNLOAD)
$content = curl_exec ($ch);
// LOG BACK OUT
curl_setopt($ch, CURLOPT_URL, 'http://proleadsmedia.com/publishers/logout.php');
$logged_out = curl_exec ($ch);
// CLOSE CURL
curl_close ($ch);
$start='<table';
$end='</table>';
$content=substr($content,strpos($content,$start),strrpos($content,$end)-strpos($content,$start));
$content='<table><tr>'.substr($content,strpos($content,'<td width="30%" align="right"><b>ID</b></td>'));
$content=substr($content,0,strpos($content,$end))."</table>";
$content=str_replace(' ',' ',str_replace(' ',' ',str_replace("\n\n","\n",$content)));
libxml_use_internal_errors(TRUE);
$dom = new DOMDocument();
$dom->loadHTML($content);
$xml = simplexml_import_dom($dom);
libxml_use_internal_errors(FALSE);
$result = $xml->xpath("//td");
//print_r(each($result[5]));
$temp=each($result[5]);
print "<p>Campaign Name: ".$temp[1]."</p>\n";
$temp=each($result[9]);
print "<p>Description: ".$temp[1]."</p>\n";
$temp=each($result[11]);
print "<p>Requirements: ".$temp[1]."</p>\n";
$temp=each($result[13]);
print "<p>Category: ".$temp[1]."</p>\n";
$temp=each($result[15]);
print "<p>Country: ".$temp[1]."</p>\n";
$temp=each($result[17]);
print "<p>Rate: ".$temp[1]."</p>\n";
?>
Now, I just need it to extract just one more thing. Here is the source of the page extracting info from:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<!-- This code is the copyrighted property of Seraph-Chan Designs, LLC. Unauthorized use strictly prohibited-->
<head>
<!-- -->
<!-- -->
<link href="http://proleadsmedia.com/templates/10/styles.css" rel="stylesheet" type="text/css" />
<!-- -->
<script type="text/javascript" src="http://proleadsmedia.com/templates/10/javascript.js"></script>
<script type="text/javascript" src="http://proleadsmedia.com/templates/10/tabber.js"></script>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta content="IE=EmulateIE8" />
<title>ProLeadsMedia - Publishers - Campaign - View</title>
</head>
<body id="member_body">
<div id="everything">
<div id="header">
<div id="logo"><a href="/home"><img src="http://proleadsmedia.com/templates/10/logo.jpg" height="110" width="343" alt="Pro Leads Media" /></a></div>
<div id="navigation" class="members_navigation" align="center">
<!-- -->
<table cellpadding="2" cellspacing="0" style="width:100%;text-align:center;">
<tr class="td">
<td width="25%"><a href="http://proleadsmedia.com/publishers/home"><b>Home</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/edit.php"><b>Edit Account</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/websites/"><b>Websites</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/campaigns/index.php"><b>Campaigns</b></a></td>
</tr>
<tr class="td">
<td width="25%"><a href="http://proleadsmedia.com/publishers/balance.php"><b>Balance</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/reports.php"><b>Reporting</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/services.php"><b>Services</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/logout.php"><b>Logout</b></a></td>
</tr>
</table>
<br />
<!-- -->
</div>
</div>
<!-- -->
<div id="mainbody_wrapper">
<div id="mainbody">
<!-- --><div>
<table cellpadding="0" cellspacing="0" border="0">
<tr>
<td><div><b>Viewing Campaign #4811 for Money4Every1</b></div></td>
</tr>
</table>
</div><br />
<ul id="tbr_camp">
<li id="main"><a href="http://proleadsmedia.com/publishers/campaigns/view.php?wid=592&cid=4811">Main</a></li>
<li id="pixel"><a href="http://proleadsmedia.com/publishers/campaigns/pixel.php?wid=592&cid=4811">Pixels</a></li>
<li id="postback"><a href="http://proleadsmedia.com/publishers/postback.php?wid=592&cid=4811">PostBack</a></li>
<li id="report"><a href="http://proleadsmedia.com/publishers/campaigns/report.php?wid=592&cid=4811">Report</a></li>
</ul>
<script style="text/javascript">
<!--
var tabber = new Tabber('tbr_camp');
tabber.pressed = 'main';
tabber.url = true;
tabber.build();
-->
</script><table cellspacing="0" cellpadding="0" style="border:0px;width:100%">
<tr valign="top">
<td style="width:50%">
<table cellspacing="0" cellpadding="4" style="border:0px;width:100%">
<tr>
<td width="30%" align="right"><b>ID</b></td>
<td width="70%">4811</td>
</tr>
<tr>
<td align="right"><b>Type</b></td>
<td>Cost Per Lead (CPL)</td>
</tr>
<tr valign="top">
<td align="right"><b>Campaign Name</b></td>
<td>
<div>$1MM Easter Egg Sweepstakes <b>*Incent*</b>
</div>
</td>
</tr>
<tr>
<td align="right"><b>Campaign URL</b></td>
<td><a href="http://proleadsmedia.com/m/1ab8250tf3112cbs403/test" target="_blank" class="link">Preview Landing Page</a></td>
</tr>
<tr valign="top">
<td align="right"><b>Description</b></td>
<td>Enter email on 1st page & popup(s).</td>
</tr>
<tr valign="top">
<td align="right"><b>Requirements</b></td>
<td>Enter email on 1st page & popup(s).</td>
</tr>
<tr>
<td align="right"><b>Category</b></td>
<td>1ClickFamily</td>
</tr>
<tr>
<td align="right"><b>Country</b></td>
<td>United States</td>
</tr>
<tr>
<td align="right"><b>Rate</b></td>
<td> $0.40
</td>
<td></td>
</tr>
</table>
</td>
<td style="width:50%">
<table cellspacing="0" cellpadding="4" style="border:0px;width:100%">
<tr>
<td width="30%" align="right"><b>Earned Today</b></td>
<td width="70%">$0.00</td>
</tr>
<tr>
<td align="right"><b>Earned Total</b></td>
<td>$0.00</td>
</tr>
<tr>
<td align="right"><b>Leads</b></td>
<td>0</td>
</tr>
<tr>
<td align="right"><b>Clicks</b></td>
<td>1</td>
</tr>
<tr>
<td align="right"><b>Last Credit</b></td>
<td>n/a</td>
</tr>
</table>
</td>
</tr>
</table>
<br />
<a name="creatives"></a>
<ul id="tabberCamps">
<li id="textAds">Text Ads (1)</li>
<li id="bannerAds">Banner Ads (0)</li>
<li id="emailAds">E-mail Ads (0)</li>
</ul>
<div style="margin-top:10px;padding:4px" class="table">
<div><b>Your Tracking Info</b> - In order to add YTIs onto your campaign you must add it at the very end. You can have up to 5 YTIs separated by a backslash /.</div><br />
<div>For example if you wanted to pass the YTIs "John" and "1006" then the URL would look like:<br />
http://proleadsmedia.com/m/1ab8250tf3112cbs403/John/1006
</div>
<div style="margin-top:10px;padding:4px" class="table">
<div><b>Default URL</b> - </div><br />
http://proleadsmedia.com/m/1ab8250tf3112cbs403/
</div>
<div id="tabberCamps_textAds">
<div style="margin-top:10px;padding:4px" class="table">
<div style="overflow:auto;max-height:100px">$1MM Easter Egg Sweepstakes</div>
<br />
<table cellpadding="2" cellspacing="0" style="border:0px;">
<tr>
<td style="width:50px" align="right"><b>URL</b></td>
<td style="width:540px"><div style="overflow:auto;max-height:100px;padding:2px" onclick="autoSelect(this);" class="table">http://proleadsmedia.com/m/1ab8250tf3112cbs403/</div></td>
</tr>
<tr>
<td align="right"><b>HTML</b></td>
<td><div style="overflow:auto;max-height:100px;padding:2px" onclick="autoSelect(this);" class="table"><a href="http://proleadsmedia.com/m/1ab8250tf3112cbs403/">$1MM Easter Egg Sweepstakes</a></div></td>
</tr>
</table>
</div>
</div>
<div id="tabberCamps_bannerAds">
<br />
<div>This campaign has no banner ads.</div>
</div>
<div id="tabberCamps_emailAds">
<br />
<div>This campaign has no e-mail ads.</div>
</div>
<script style="text/javascript">
<!--
var tabber = new Tabber('tabberCamps');
tabber.anchorID = 'creatives';
tabber.pressed = 'textAds';
tabber.build();
-->
</script>
<!-- -->
</div>
</div>
<!-- -->
</div>
<!-- -->
<div id="footer">
<a href="/home">© <!----> 2011 - 2012 <!----> ProLeadsMedia </a> <br>
<a href="http://proleadsmedia.com/privacy" target="_blank" >Privacy Policy</a> <br> <a href="http://proleadsmedia.com/networks" target="_blank" >Find Networks</a>
</div>
</body>
</html>
Now the thing I need it to extract is the URL. Thing i need extracted is in RED. Here is the line that it is on: <td style="width:540px"><div style="overflow:auto;max-height:100px;padding:2px" onclick="autoSelect(this);" class="table">http://proleadsmedia.com/m/1ab8250tf3112cbs403/</div></td>
If you need any more info just ask. Thank you VERY much if you do this!
This is the code:
<?php
// INIT CURL
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'http://proleadsmedia.com/publishers/login.php?next');
curl_setopt ($ch, CURLOPT_POST, true);
curl_setopt ($ch, CURLOPT_POSTFIELDS, 'username=*************&password=************');
curl_setopt ($ch, CURLOPT_COOKIEJAR, './cookie.txt');
curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
// EXECUTE 1st REQUEST (FORM LOGIN)
$store = curl_exec ($ch);
// SET FILE TO DOWNLOAD
curl_setopt($ch, CURLOPT_URL, 'http://proleadsmedia.com/publishers/campaigns/view.php?wid=592&cid=4811');
// EXECUTE 2nd REQUEST (FILE DOWNLOAD)
$content = curl_exec ($ch);
// LOG BACK OUT
curl_setopt($ch, CURLOPT_URL, 'http://proleadsmedia.com/publishers/logout.php');
$logged_out = curl_exec ($ch);
// CLOSE CURL
curl_close ($ch);
$start='<table';
$end='</table>';
$content=substr($content,strpos($content,$start),strrpos($content,$end)-strpos($content,$start));
$content='<table><tr>'.substr($content,strpos($content,'<td width="30%" align="right"><b>ID</b></td>'));
$content=substr($content,0,strpos($content,$end))."</table>";
$content=str_replace(' ',' ',str_replace(' ',' ',str_replace("\n\n","\n",$content)));
libxml_use_internal_errors(TRUE);
$dom = new DOMDocument();
$dom->loadHTML($content);
$xml = simplexml_import_dom($dom);
libxml_use_internal_errors(FALSE);
$result = $xml->xpath("//td");
//print_r(each($result[5]));
$temp=each($result[5]);
print "<p>Campaign Name: ".$temp[1]."</p>\n";
$temp=each($result[9]);
print "<p>Description: ".$temp[1]."</p>\n";
$temp=each($result[11]);
print "<p>Requirements: ".$temp[1]."</p>\n";
$temp=each($result[13]);
print "<p>Category: ".$temp[1]."</p>\n";
$temp=each($result[15]);
print "<p>Country: ".$temp[1]."</p>\n";
$temp=each($result[17]);
print "<p>Rate: ".$temp[1]."</p>\n";
?>
Now, I just need it to extract just one more thing. Here is the source of the page extracting info from:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<!-- This code is the copyrighted property of Seraph-Chan Designs, LLC. Unauthorized use strictly prohibited-->
<head>
<!-- -->
<!-- -->
<link href="http://proleadsmedia.com/templates/10/styles.css" rel="stylesheet" type="text/css" />
<!-- -->
<script type="text/javascript" src="http://proleadsmedia.com/templates/10/javascript.js"></script>
<script type="text/javascript" src="http://proleadsmedia.com/templates/10/tabber.js"></script>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
<meta content="IE=EmulateIE8" />
<title>ProLeadsMedia - Publishers - Campaign - View</title>
</head>
<body id="member_body">
<div id="everything">
<div id="header">
<div id="logo"><a href="/home"><img src="http://proleadsmedia.com/templates/10/logo.jpg" height="110" width="343" alt="Pro Leads Media" /></a></div>
<div id="navigation" class="members_navigation" align="center">
<!-- -->
<table cellpadding="2" cellspacing="0" style="width:100%;text-align:center;">
<tr class="td">
<td width="25%"><a href="http://proleadsmedia.com/publishers/home"><b>Home</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/edit.php"><b>Edit Account</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/websites/"><b>Websites</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/campaigns/index.php"><b>Campaigns</b></a></td>
</tr>
<tr class="td">
<td width="25%"><a href="http://proleadsmedia.com/publishers/balance.php"><b>Balance</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/reports.php"><b>Reporting</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/services.php"><b>Services</b></a></td>
<td width="25%"><a href="http://proleadsmedia.com/publishers/logout.php"><b>Logout</b></a></td>
</tr>
</table>
<br />
<!-- -->
</div>
</div>
<!-- -->
<div id="mainbody_wrapper">
<div id="mainbody">
<!-- --><div>
<table cellpadding="0" cellspacing="0" border="0">
<tr>
<td><div><b>Viewing Campaign #4811 for Money4Every1</b></div></td>
</tr>
</table>
</div><br />
<ul id="tbr_camp">
<li id="main"><a href="http://proleadsmedia.com/publishers/campaigns/view.php?wid=592&cid=4811">Main</a></li>
<li id="pixel"><a href="http://proleadsmedia.com/publishers/campaigns/pixel.php?wid=592&cid=4811">Pixels</a></li>
<li id="postback"><a href="http://proleadsmedia.com/publishers/postback.php?wid=592&cid=4811">PostBack</a></li>
<li id="report"><a href="http://proleadsmedia.com/publishers/campaigns/report.php?wid=592&cid=4811">Report</a></li>
</ul>
<script style="text/javascript">
<!--
var tabber = new Tabber('tbr_camp');
tabber.pressed = 'main';
tabber.url = true;
tabber.build();
-->
</script><table cellspacing="0" cellpadding="0" style="border:0px;width:100%">
<tr valign="top">
<td style="width:50%">
<table cellspacing="0" cellpadding="4" style="border:0px;width:100%">
<tr>
<td width="30%" align="right"><b>ID</b></td>
<td width="70%">4811</td>
</tr>
<tr>
<td align="right"><b>Type</b></td>
<td>Cost Per Lead (CPL)</td>
</tr>
<tr valign="top">
<td align="right"><b>Campaign Name</b></td>
<td>
<div>$1MM Easter Egg Sweepstakes <b>*Incent*</b>
</div>
</td>
</tr>
<tr>
<td align="right"><b>Campaign URL</b></td>
<td><a href="http://proleadsmedia.com/m/1ab8250tf3112cbs403/test" target="_blank" class="link">Preview Landing Page</a></td>
</tr>
<tr valign="top">
<td align="right"><b>Description</b></td>
<td>Enter email on 1st page & popup(s).</td>
</tr>
<tr valign="top">
<td align="right"><b>Requirements</b></td>
<td>Enter email on 1st page & popup(s).</td>
</tr>
<tr>
<td align="right"><b>Category</b></td>
<td>1ClickFamily</td>
</tr>
<tr>
<td align="right"><b>Country</b></td>
<td>United States</td>
</tr>
<tr>
<td align="right"><b>Rate</b></td>
<td> $0.40
</td>
<td></td>
</tr>
</table>
</td>
<td style="width:50%">
<table cellspacing="0" cellpadding="4" style="border:0px;width:100%">
<tr>
<td width="30%" align="right"><b>Earned Today</b></td>
<td width="70%">$0.00</td>
</tr>
<tr>
<td align="right"><b>Earned Total</b></td>
<td>$0.00</td>
</tr>
<tr>
<td align="right"><b>Leads</b></td>
<td>0</td>
</tr>
<tr>
<td align="right"><b>Clicks</b></td>
<td>1</td>
</tr>
<tr>
<td align="right"><b>Last Credit</b></td>
<td>n/a</td>
</tr>
</table>
</td>
</tr>
</table>
<br />
<a name="creatives"></a>
<ul id="tabberCamps">
<li id="textAds">Text Ads (1)</li>
<li id="bannerAds">Banner Ads (0)</li>
<li id="emailAds">E-mail Ads (0)</li>
</ul>
<div style="margin-top:10px;padding:4px" class="table">
<div><b>Your Tracking Info</b> - In order to add YTIs onto your campaign you must add it at the very end. You can have up to 5 YTIs separated by a backslash /.</div><br />
<div>For example if you wanted to pass the YTIs "John" and "1006" then the URL would look like:<br />
http://proleadsmedia.com/m/1ab8250tf3112cbs403/John/1006
</div>
<div style="margin-top:10px;padding:4px" class="table">
<div><b>Default URL</b> - </div><br />
http://proleadsmedia.com/m/1ab8250tf3112cbs403/
</div>
<div id="tabberCamps_textAds">
<div style="margin-top:10px;padding:4px" class="table">
<div style="overflow:auto;max-height:100px">$1MM Easter Egg Sweepstakes</div>
<br />
<table cellpadding="2" cellspacing="0" style="border:0px;">
<tr>
<td style="width:50px" align="right"><b>URL</b></td>
<td style="width:540px"><div style="overflow:auto;max-height:100px;padding:2px" onclick="autoSelect(this);" class="table">http://proleadsmedia.com/m/1ab8250tf3112cbs403/</div></td>
</tr>
<tr>
<td align="right"><b>HTML</b></td>
<td><div style="overflow:auto;max-height:100px;padding:2px" onclick="autoSelect(this);" class="table"><a href="http://proleadsmedia.com/m/1ab8250tf3112cbs403/">$1MM Easter Egg Sweepstakes</a></div></td>
</tr>
</table>
</div>
</div>
<div id="tabberCamps_bannerAds">
<br />
<div>This campaign has no banner ads.</div>
</div>
<div id="tabberCamps_emailAds">
<br />
<div>This campaign has no e-mail ads.</div>
</div>
<script style="text/javascript">
<!--
var tabber = new Tabber('tabberCamps');
tabber.anchorID = 'creatives';
tabber.pressed = 'textAds';
tabber.build();
-->
</script>
<!-- -->
</div>
</div>
<!-- -->
</div>
<!-- -->
<div id="footer">
<a href="/home">© <!----> 2011 - 2012 <!----> ProLeadsMedia </a> <br>
<a href="http://proleadsmedia.com/privacy" target="_blank" >Privacy Policy</a> <br> <a href="http://proleadsmedia.com/networks" target="_blank" >Find Networks</a>
</div>
</body>
</html>
Now the thing I need it to extract is the URL. Thing i need extracted is in RED. Here is the line that it is on: <td style="width:540px"><div style="overflow:auto;max-height:100px;padding:2px" onclick="autoSelect(this);" class="table">http://proleadsmedia.com/m/1ab8250tf3112cbs403/</div></td>
If you need any more info just ask. Thank you VERY much if you do this!