I want the function in red to be a loop instead of a recursive function so that I can cut down on memory use and the possible error of the stack being filled. My main problem is that a javascript for loop won't wait for an ajax request to complete before it continues executing code and that is what has kept me from writing a successful loop function. One you brilliant minds out there, can you give me a hand?
Last edited by mberkom; 04-17-2009 at 06:50 PM..
Reason: misspelling
which would be faster due to the simultaneous requests, but the order wouldn't be guaranteed. Also, since I don't use jquery I'm not sure how well it handles simultaneous requests.
For some reason, I still get an error after about 25 entries put on the page that: script stack space quota is exhausted. I would like to figure out why that error occurs... And, if there is a solution.
Try this. By putting more the script into the global scope, and by using setTimeout to 'escape' from your current execution scope (allowing that old scope to be garbage collected), it should handle more/bigger files.
Code:
<script type="text/javascript" charset="utf-8">
/*******************************************************
*Data Scraping Script *
*******************************************************/
//Global Vars
//------------------------------------------------------
var session_id = <?=$session->id?>;
//selectors
var new_entry_sel = '<?=$session->new_entry_selector?>';
var state_sel = '<?=$session->state_selector?>';
var person_name_sel = '<?=$session->person_name_selector?>';
var company_name_sel = '<?=$session->company_name_selector?>';
var email_sel = '<?=$session->email_selector?>';
var phone_sel = '<?=$session->phone_selector?>';
var address_sel = '<?=$session->address_selector?>';
var link_sel = '<?=$session->link_selector?>';
var custom_sel = '<?=$session->custom_selector?>';
//array of links to be searched
var links = [
<?php foreach($links->all as $l):?>
'<?=trim($l->url)?>',
<?php endforeach;?>
''
];
//recurse through links, parse data, print out onto page
function parse_data(){
$.post('/scraper/bot/get_page', { url: links[0]}, function(data){
if(state_sel != ''){var state = $(state_sel, data).html();}
for (var s=0; s <= ($(new_entry_sel, data).length-1); s++) {
//data gathered variables
if(person_name_sel != ''){var person_name = $(new_entry_sel, data).eq(s).find(person_name_sel).html();}
if(company_name_sel != ''){var company_name = $(new_entry_sel, data).eq(s).find(company_name_sel).html();}
if(email_sel != ''){var email = $(new_entry_sel, data).eq(s).find(email_sel).html();}
if(phone_sel != ''){var phone = $(new_entry_sel, data).eq(s).find(phone_sel).html();}
if(address_sel != ''){var address = $(new_entry_sel, data).eq(s).find(address_sel).html();}
if(link_sel != ''){var link = $(new_entry_sel, data).eq(s).find(link_sel).attr('href');}
if(custom_sel != ''){var custom = $(new_entry_sel, data).eq(s).find(custom_sel).html();}
//insert data
$('#data').append('<tr><td>'+state+'</td><td>'+company_name+'</td><td>'+person_name+'</td><td>'+email+'</td><td>'+phone+'</td><td>'+address+'</td><td>'+link+'</td><td>'+custom+'</td></tr>');
};
links.shift();
if(links.length) {
setTimeout(parse_data,1);
}
else {
alert('Finished');
}
});
}
$(document).ready(function() {
parse_data();
});
</script>