oxnume
01-21-2011, 09:58 PM
I wrote a script which scrapes a list of urls for data using curl_multi and then calls other functions to process the data. It works but only for a certain number of links after which memory gets all taken up and I get this error:
Fatal error: Allowed memory size of 134217728 bytes exhausted (tried to allocate 172694 bytes) in D:\www\updatefunc.php on line 236
Below is the function containing the problem. I tried unsetting a couple of arrays and pointing them to NULL and even manually calling gc cycles but nothing seems to work :(
function updateListMulti($mysql_result, $acc, $limit = 99999){
$rand = rand();
$ready = 0;
$location = getLocation($acc);
$switch = getSwitch();
for ($count1 = 0; ($count1 < ceil(mysql_num_rows($mysql_result)/MAX_CONNECTIONS)) && ($ready < $limit) && $switch; $count1++){
$mh = curl_multi_init();
unset($row);
$row = null;
unset($ch);
$ch = null;
gc_collect_cycles();
for ($count2 = 0; ($count2 < MAX_CONNECTIONS) && ($row[$count2] = mysql_fetch_array($mysql_result)) && $switch; $count2++){
$ch[$count2] = curl_init();
curl_setopt($ch[$count2], CURLOPT_USERAGENT, MY_USERAGENT);
curl_setopt($ch[$count2], CURLOPT_COOKIEFILE , COOKIES_PATH."$acc.txt");
curl_setopt($ch[$count2], CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch[$count2], CURLOPT_URL,"http://example.com/page.php?target=".$row[$count2]['id']);
//add the handle to multi
curl_multi_add_handle($mh, $ch[$count2]);
}
// currently working method
do {
$status = curl_multi_exec($mh, $active); // <========= THIS IS LINE 236
$info = curl_multi_info_read($mh);
if (false !== $info) {
//var_dump($info);
}
} while ($status === CURLM_CALL_MULTI_PERFORM || $active);
foreach ($ch as $i => $url) {
$html = curl_multi_getcontent($ch[$i]);
if($info = updateOneHtml($row[$i]['id'], $location, $html))
if (updateOneDB($info, $rand))
$ready++;
}
curl_multi_close($mh);
// another method which doesn't work
// (removed for clarity)
//
$switch = getSwitch();
}
}
Please tell me if you see anything I'm doing wrong with this code.
Fatal error: Allowed memory size of 134217728 bytes exhausted (tried to allocate 172694 bytes) in D:\www\updatefunc.php on line 236
Below is the function containing the problem. I tried unsetting a couple of arrays and pointing them to NULL and even manually calling gc cycles but nothing seems to work :(
function updateListMulti($mysql_result, $acc, $limit = 99999){
$rand = rand();
$ready = 0;
$location = getLocation($acc);
$switch = getSwitch();
for ($count1 = 0; ($count1 < ceil(mysql_num_rows($mysql_result)/MAX_CONNECTIONS)) && ($ready < $limit) && $switch; $count1++){
$mh = curl_multi_init();
unset($row);
$row = null;
unset($ch);
$ch = null;
gc_collect_cycles();
for ($count2 = 0; ($count2 < MAX_CONNECTIONS) && ($row[$count2] = mysql_fetch_array($mysql_result)) && $switch; $count2++){
$ch[$count2] = curl_init();
curl_setopt($ch[$count2], CURLOPT_USERAGENT, MY_USERAGENT);
curl_setopt($ch[$count2], CURLOPT_COOKIEFILE , COOKIES_PATH."$acc.txt");
curl_setopt($ch[$count2], CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch[$count2], CURLOPT_URL,"http://example.com/page.php?target=".$row[$count2]['id']);
//add the handle to multi
curl_multi_add_handle($mh, $ch[$count2]);
}
// currently working method
do {
$status = curl_multi_exec($mh, $active); // <========= THIS IS LINE 236
$info = curl_multi_info_read($mh);
if (false !== $info) {
//var_dump($info);
}
} while ($status === CURLM_CALL_MULTI_PERFORM || $active);
foreach ($ch as $i => $url) {
$html = curl_multi_getcontent($ch[$i]);
if($info = updateOneHtml($row[$i]['id'], $location, $html))
if (updateOneDB($info, $rand))
$ready++;
}
curl_multi_close($mh);
// another method which doesn't work
// (removed for clarity)
//
$switch = getSwitch();
}
}
Please tell me if you see anything I'm doing wrong with this code.