ladson4@comcast
01-17-2009, 10:52 PM
I wrote the following javascript to validate the hyperlinks contained within a MSWORD document. It worked fine while testing a folder containing about a dozen test documents, but failed because of "insufficient memory" when executed on a folder containing about 300 documents (with 5000 hyperlinks).
The problem relates to the following commands, which attempts to pinpoint the page/line number at which the hyperlink resides. The script works fine when these lines are removed.
line_no = aLink.Range.Information(GwdFirstCharacterLineNumber); // consumes too much memory
page_no = aLink.Range.Information(GwdActiveEndPageNumber); // consumes too much memory
I would be interested in knowing how to provide this functionality (for user friendliness) without overflowing memory.
//================================
// Link Check one file.
function linkcheck(fn)
{
GconfirmConversions = false;
GreadOnly = true;
GaddToRecentFiles = false;
GwdFirstCharacterLineNumber = 10;
GwdActiveEndPageNumber = 3;
var d = Wrd.Documents.Open(fn, GconfirmConversions, GreadOnly, GaddToRecentFiles);
d.Activate();
// List out hyperlinks
var aLink;
var c = d.Hyperlinks.Count;
var line_no;
var page_no;
n = logf.WriteLine("------>" + fn + ",,," + "hyperlinks=" + c);
totalFiles = totalFiles + 1;
totalHyperlinks = totalHyperlinks + c;
var e = new Enumerator(d.Hyperlinks);
for ( ; !e.atEnd(); e.moveNext())
{
error = "ok";
line_no = "???";
page_no = "???";
aLink = null;
aLink = e.item();
link = aLink.Address;
//line_no = aLink.Range.Information(GwdFirstCharacterLineNumber); // consumes too much memory
//page_no = aLink.Range.Information(GwdActiveEndPageNumber); // consumes too much memory
// let's look at just the drive name portion.
// it may be a drive letter or even a UNC \\server\folder
dn = Fso.GetDriveName(link);
if (dn != "") {
error = "Bad Drive: " + dn;
}
if (dn.slice(0,1) == "S") {
error = "Network Drive: " + dn;
}
if (dn.slice(0,4) == "FILE") {
error = "Local Drive: " + dn;
}
// if it's an HTTP URL, let's see if it returns 200 (OK) if we open it.
if (link.slice(0,5) == "http:") {
try {
link = link;
xml.Open("GET", link, false);
xml.Send(null);
// xml.ResponseText contains answer.
xs = xml.status;
if (xs != 200) {
error = "HTTP (" + xs + ")";
}
}
catch(ex){
error = "Unreachable";
}
finally{
// no such command xml.Close();
xml = null;
}
}//if
logf.WriteLine(fn + "," + link + "," + error + "," + "page=" + page_no + "/ line=" +line_no);
}//for
e = null;
d.Close(false);
d = null;
}//function
//================================
BTW, I run this script using the "wscript" tool, under Windows. A main program delivers documents one at a time to this function. Again, the script works fine with a few documents (dozens), but not with many (hundreds)
The problem relates to the following commands, which attempts to pinpoint the page/line number at which the hyperlink resides. The script works fine when these lines are removed.
line_no = aLink.Range.Information(GwdFirstCharacterLineNumber); // consumes too much memory
page_no = aLink.Range.Information(GwdActiveEndPageNumber); // consumes too much memory
I would be interested in knowing how to provide this functionality (for user friendliness) without overflowing memory.
//================================
// Link Check one file.
function linkcheck(fn)
{
GconfirmConversions = false;
GreadOnly = true;
GaddToRecentFiles = false;
GwdFirstCharacterLineNumber = 10;
GwdActiveEndPageNumber = 3;
var d = Wrd.Documents.Open(fn, GconfirmConversions, GreadOnly, GaddToRecentFiles);
d.Activate();
// List out hyperlinks
var aLink;
var c = d.Hyperlinks.Count;
var line_no;
var page_no;
n = logf.WriteLine("------>" + fn + ",,," + "hyperlinks=" + c);
totalFiles = totalFiles + 1;
totalHyperlinks = totalHyperlinks + c;
var e = new Enumerator(d.Hyperlinks);
for ( ; !e.atEnd(); e.moveNext())
{
error = "ok";
line_no = "???";
page_no = "???";
aLink = null;
aLink = e.item();
link = aLink.Address;
//line_no = aLink.Range.Information(GwdFirstCharacterLineNumber); // consumes too much memory
//page_no = aLink.Range.Information(GwdActiveEndPageNumber); // consumes too much memory
// let's look at just the drive name portion.
// it may be a drive letter or even a UNC \\server\folder
dn = Fso.GetDriveName(link);
if (dn != "") {
error = "Bad Drive: " + dn;
}
if (dn.slice(0,1) == "S") {
error = "Network Drive: " + dn;
}
if (dn.slice(0,4) == "FILE") {
error = "Local Drive: " + dn;
}
// if it's an HTTP URL, let's see if it returns 200 (OK) if we open it.
if (link.slice(0,5) == "http:") {
try {
link = link;
xml.Open("GET", link, false);
xml.Send(null);
// xml.ResponseText contains answer.
xs = xml.status;
if (xs != 200) {
error = "HTTP (" + xs + ")";
}
}
catch(ex){
error = "Unreachable";
}
finally{
// no such command xml.Close();
xml = null;
}
}//if
logf.WriteLine(fn + "," + link + "," + error + "," + "page=" + page_no + "/ line=" +line_no);
}//for
e = null;
d.Close(false);
d = null;
}//function
//================================
BTW, I run this script using the "wscript" tool, under Windows. A main program delivers documents one at a time to this function. Again, the script works fine with a few documents (dozens), but not with many (hundreds)