Spudhead
05-31-2005, 05:12 PM
At possible risk of contravening the "must be useful" posting guidelines, and at definite risk of looking like a complete saddo, may I hesitantly submit to the board my Amazing Word Counting Thing.
Originally started as a reply to the post here:
http://www.codingforums.com/showthread.php?t=59859
It's now been enhanced with additional statistics-generating feature-bloat, graphs, charts and a half-hearted attempt to tidy up the code a bit.
Use it, love it, critique it, throw spatulas at it and call it Shirley.
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Word Frequency Counter Thing</title>
<style type="text/css">
.graph{border: 1px dashed #cccccc; font-family:Arial; font-size:8pt; background:url(img/gradient.jpg); background-position:top center; background-repeat:repeat-x; float:left; margin:10px;}
.stats{border: 1px dashed #cccccc; font-family:Arial; font-size:8pt; background:url(img/gradient.jpg); background-position:top center; background-repeat:repeat-x; float:left; margin:10px;}
.stats th{text-align:left; color:#FFFF33;}
a:link{font-family:Arial; font-size:8pt; color:#000000;}
a:visited{font-family:Arial; font-size:8pt; color:#000000;}
a:hover{font-family:Arial; font-size:8pt; color:#072E69; text-decoration:none;}
</style>
<script language="JavaScript">
// Global & Init variables
var whichStats = "freq";
var showFullStats = false;
var statsControl = "<a href='#' onClick='toggleStats();'>show full stats</a>";
function toggleStats(){
/*
1. switches a global control variable that's used by the Calculate functions
2. calls the current Calculate function so that the change is applied
*/
showFullStats = (showFullStats == false) ? true : false;
statsControl = (showFullStats == false) ? "<a href='#' onClick='toggleStats();'>show full stats</a>" : "<a href='#' onClick='toggleStats();'>hide full stats</a>";
switch (whichStats){
case "freq":{
calculateByFrequency();
break;
}
case "leng": {
calculateByLength();
break;
}
}
}
function graph(arr){
/*
takes a 2-dimensional array where:
array[x][0] is the field name
array[x][1] is the field value
returns a HTML table representing a graph of the data
*/
var reportLength = (arr.length>10) ? 10 : arr.length;
strOutput = "<table class='graph' cellpadding=0 cellspacing=5 border=0><tr>";
for(i=0;i<reportLength;i++){
fieldValue = arr[i][1];
strOutput += "<td valign=bottom><div width=10 height=" + fieldValue*10 + " style='background-color:red; width:10px; height:" + fieldValue*10 + "px;'></div></td>";
}
strOutput += "</tr><tr>";
for(i=0;i<reportLength;i++){
fieldName = arr[i][0];
fieldValue = arr[i][1];
strOutput += "<td>" + fieldName + "<br/>(" + fieldValue + ")</td>";
}
strOutput += "</tr></table>";
return strOutput;
}
function cleanWord(word){
rv = word.toLowerCase();
rv = rv.replace(",","");
rv = rv.replace(".","");
rv = rv.replace('"','');
rv = rv.replace("?","");
rv = rv.replace("!","");
return rv;
}
function sortArray(arrayToSort){
/*
takes a 2-dimensional array where:
array[x][0] is the key
array[x][1] is an integer value
and returns a 2-dimensional array with items ordered by array[x][1]
*/
var rv = new Array();
var maxValue = 0;
var sortCounter = 0;
for (c=0;c<arrayToSort.length;c++){
maxValue = (arrayToSort[c][1]>maxValue) ? arrayToSort[c][1] : maxValue;
}
for (c=maxValue;c>=1;c--){
for (d=0;d<arrayToSort.length;d++){
if (arrayToSort[d][1]==c){
rv[sortCounter] = new Array(2);
rv[sortCounter][0] = arrayToSort[d][0];
rv[sortCounter][1] = arrayToSort[d][1];
sortCounter++;
}
}
}
return rv;
}
function calculateByFrequency(){
var text = document.getElementById('t1').value;
var words = text.split(" ");
for (i=0;i<words.length;i++){
words[i] = cleanWord(words[i]);
}
var wordsFrequencies = new Array();
var uniqueWordCount = 0;
// Count unique words and place into array
for (c=0;c<words.length;c++){
alreadyCounted=false;
currentWord = words[c];
// make sure it's a word we haven't already counted
for (e=0;e<wordsFrequencies.length;e++){
if (currentWord==wordsFrequencies[e][0]){
alreadyCounted = true;
}
}
if (!alreadyCounted){
// add to array
wordsFrequencies[uniqueWordCount] = new Array(2);
wordsFrequencies[uniqueWordCount][0] = currentWord;
wordsFrequencies[uniqueWordCount][1] = 0;
// count total instances
for (d=0;d<words.length;d++){
if (words[d]==wordsFrequencies[uniqueWordCount][0]){
wordsFrequencies[uniqueWordCount][1]++;
}
}
uniqueWordCount++;
}
}
// Sort array
var sorted = sortArray(wordsFrequencies);
// create HTML list output
var statsLength = (showFullStats == false) ? 10 : sorted.length;
strOutput = graph(sorted);
strOutput += statsControl;
strOutput += "<table class='stats' cellpadding=0 cellspacing=5 border=0>";
strOutput += "<tr><th>Total words</th><td>" + words.length + "</td></tr>";
strOutput += "<tr><th>Total unique words</th><td>" + uniqueWordCount + "</td></tr>";
strOutput += "</table>";
strOutput += "<table class='stats' cellpadding=0 cellspacing=5 border=0>";
strOutput += "<tr><th> </th><th>Word</th><th>Count</th><th>% Total</th><th>% Unique</th></tr>";
for (i=0;i<statsLength;i++){
pcTotal = (sorted[i][1]/words.length)*100;
pcUnique = (sorted[i][1]/uniqueWordCount)*100;
pcTotal = pcTotal.toFixed(2);
pcUnique = pcUnique.toFixed(2);
//strOutput += "<li>" + sorted[i][0] + " : <b>" + sorted[i][1] + "</b> ("+ pcTotal +"% of total words, "+ pcUnique +"% of unique words)";
strOutput += "<tr><td>" + (i+1) + "</td><td>" + sorted[i][0] + "</td><td>" + sorted[i][1] + "</td><td>" + pcTotal + "</td><td>" + pcUnique + "</td></tr>";
}
strOutput += "</table>";
document.getElementById("report").innerHTML = strOutput;
}
function calculateByLength(){
var text = document.getElementById('t1').value;
var words = text.split(" ");
for (i=0;i<words.length;i++){
words[i] = cleanWord(words[i]);
}
var lengthFrequencies = new Array();
var maxWordLength = 0;
var uniqueLengthCount = 0;
// find max word length
for (i=0;i<words.length;i++){
maxWordLength = (words[i].length > maxWordLength) ? words[i].length : maxWordLength;
}
// for each possible word length, find number of words of that length
for (i=maxWordLength;i>0;i--){
wordCounter = 0;
for (j=0;j<words.length;j++){
if (words[j].length == i){
wordCounter++;
}
}
if (wordCounter>0){
lengthFrequencies[uniqueLengthCount] = new Array(2);
lengthFrequencies[uniqueLengthCount][0] = i;
lengthFrequencies[uniqueLengthCount][1] =wordCounter;
uniqueLengthCount++;
}
}
// sort list by length frequencies instead of descending length order
var sorted = sortArray(lengthFrequencies);
// create HTML list output
var statsLength = (showFullStats == false) ? 10 : sorted.length;
strOutput = graph(sorted);
strOutput += statsControl;
strOutput += "<table class='stats' cellpadding=0 cellspacing=5 border=0>";
strOutput += "<tr><th> </th><th>Word Length</th><th>Count</th><th>% Total</th></tr>";
for (i=0;i<statsLength;i++){
pcTotal = (sorted[i][1]/words.length)*100;
pcTotal = pcTotal.toFixed(2);
strOutput += "<tr><td>" + (i+1) + "</td><td>" + sorted[i][0] + "</td><td>" + sorted[i][1] + "</td><td>" + pcTotal + "</td></tr>";
}
strOutput += "</table>";
document.getElementById("report").innerHTML = strOutput;
}
function initCalc(){
for (i=0;i<document.f1.r1.length;i++){
if (document.f1.r1[i].checked){
whichStats=document.f1.r1[i].value;
}
}
switch (whichStats){
case "freq":{
calculateByFrequency();
break;
}
case "leng": {
calculateByLength();
break;
}
}
}
</script>
</head>
<body>
<form id="f1" name="f1">
<textarea id="t1" name="t1" rows=6 cols=100>
Ten years ago a crack commando unit was sent to prison by a military court for a crime they didn't commit. These men promptly escaped from a maximum security stockade to the Los Angeles underground. Today, still wanted by the government, they survive as soldiers of fortune. If you have a problem and no one else can help, and if you can find them, maybe you can hire the A-team.
Knight Rider, a shadowy fight into the dangerous world of a man who does not exist. Michael Knight, a young loner on a crusade to champion the cause of the innocent, the helpless in a world of criminals who operate above the law.
</textarea>
<br/>
<input type="radio" name="r1" id="r1" value="freq" checked="checked">By word frequency<br/>
<input type="radio" name="r1" id="r1" value="leng">By word length<br/>
<br/>
<input type="button" name="b1" id="b1" value="Calculate" onClick="initCalc();">
</form>
<br/><br/>
<div id="report" name="report"></div>
</body>
</html>
Originally started as a reply to the post here:
http://www.codingforums.com/showthread.php?t=59859
It's now been enhanced with additional statistics-generating feature-bloat, graphs, charts and a half-hearted attempt to tidy up the code a bit.
Use it, love it, critique it, throw spatulas at it and call it Shirley.
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Word Frequency Counter Thing</title>
<style type="text/css">
.graph{border: 1px dashed #cccccc; font-family:Arial; font-size:8pt; background:url(img/gradient.jpg); background-position:top center; background-repeat:repeat-x; float:left; margin:10px;}
.stats{border: 1px dashed #cccccc; font-family:Arial; font-size:8pt; background:url(img/gradient.jpg); background-position:top center; background-repeat:repeat-x; float:left; margin:10px;}
.stats th{text-align:left; color:#FFFF33;}
a:link{font-family:Arial; font-size:8pt; color:#000000;}
a:visited{font-family:Arial; font-size:8pt; color:#000000;}
a:hover{font-family:Arial; font-size:8pt; color:#072E69; text-decoration:none;}
</style>
<script language="JavaScript">
// Global & Init variables
var whichStats = "freq";
var showFullStats = false;
var statsControl = "<a href='#' onClick='toggleStats();'>show full stats</a>";
function toggleStats(){
/*
1. switches a global control variable that's used by the Calculate functions
2. calls the current Calculate function so that the change is applied
*/
showFullStats = (showFullStats == false) ? true : false;
statsControl = (showFullStats == false) ? "<a href='#' onClick='toggleStats();'>show full stats</a>" : "<a href='#' onClick='toggleStats();'>hide full stats</a>";
switch (whichStats){
case "freq":{
calculateByFrequency();
break;
}
case "leng": {
calculateByLength();
break;
}
}
}
function graph(arr){
/*
takes a 2-dimensional array where:
array[x][0] is the field name
array[x][1] is the field value
returns a HTML table representing a graph of the data
*/
var reportLength = (arr.length>10) ? 10 : arr.length;
strOutput = "<table class='graph' cellpadding=0 cellspacing=5 border=0><tr>";
for(i=0;i<reportLength;i++){
fieldValue = arr[i][1];
strOutput += "<td valign=bottom><div width=10 height=" + fieldValue*10 + " style='background-color:red; width:10px; height:" + fieldValue*10 + "px;'></div></td>";
}
strOutput += "</tr><tr>";
for(i=0;i<reportLength;i++){
fieldName = arr[i][0];
fieldValue = arr[i][1];
strOutput += "<td>" + fieldName + "<br/>(" + fieldValue + ")</td>";
}
strOutput += "</tr></table>";
return strOutput;
}
function cleanWord(word){
rv = word.toLowerCase();
rv = rv.replace(",","");
rv = rv.replace(".","");
rv = rv.replace('"','');
rv = rv.replace("?","");
rv = rv.replace("!","");
return rv;
}
function sortArray(arrayToSort){
/*
takes a 2-dimensional array where:
array[x][0] is the key
array[x][1] is an integer value
and returns a 2-dimensional array with items ordered by array[x][1]
*/
var rv = new Array();
var maxValue = 0;
var sortCounter = 0;
for (c=0;c<arrayToSort.length;c++){
maxValue = (arrayToSort[c][1]>maxValue) ? arrayToSort[c][1] : maxValue;
}
for (c=maxValue;c>=1;c--){
for (d=0;d<arrayToSort.length;d++){
if (arrayToSort[d][1]==c){
rv[sortCounter] = new Array(2);
rv[sortCounter][0] = arrayToSort[d][0];
rv[sortCounter][1] = arrayToSort[d][1];
sortCounter++;
}
}
}
return rv;
}
function calculateByFrequency(){
var text = document.getElementById('t1').value;
var words = text.split(" ");
for (i=0;i<words.length;i++){
words[i] = cleanWord(words[i]);
}
var wordsFrequencies = new Array();
var uniqueWordCount = 0;
// Count unique words and place into array
for (c=0;c<words.length;c++){
alreadyCounted=false;
currentWord = words[c];
// make sure it's a word we haven't already counted
for (e=0;e<wordsFrequencies.length;e++){
if (currentWord==wordsFrequencies[e][0]){
alreadyCounted = true;
}
}
if (!alreadyCounted){
// add to array
wordsFrequencies[uniqueWordCount] = new Array(2);
wordsFrequencies[uniqueWordCount][0] = currentWord;
wordsFrequencies[uniqueWordCount][1] = 0;
// count total instances
for (d=0;d<words.length;d++){
if (words[d]==wordsFrequencies[uniqueWordCount][0]){
wordsFrequencies[uniqueWordCount][1]++;
}
}
uniqueWordCount++;
}
}
// Sort array
var sorted = sortArray(wordsFrequencies);
// create HTML list output
var statsLength = (showFullStats == false) ? 10 : sorted.length;
strOutput = graph(sorted);
strOutput += statsControl;
strOutput += "<table class='stats' cellpadding=0 cellspacing=5 border=0>";
strOutput += "<tr><th>Total words</th><td>" + words.length + "</td></tr>";
strOutput += "<tr><th>Total unique words</th><td>" + uniqueWordCount + "</td></tr>";
strOutput += "</table>";
strOutput += "<table class='stats' cellpadding=0 cellspacing=5 border=0>";
strOutput += "<tr><th> </th><th>Word</th><th>Count</th><th>% Total</th><th>% Unique</th></tr>";
for (i=0;i<statsLength;i++){
pcTotal = (sorted[i][1]/words.length)*100;
pcUnique = (sorted[i][1]/uniqueWordCount)*100;
pcTotal = pcTotal.toFixed(2);
pcUnique = pcUnique.toFixed(2);
//strOutput += "<li>" + sorted[i][0] + " : <b>" + sorted[i][1] + "</b> ("+ pcTotal +"% of total words, "+ pcUnique +"% of unique words)";
strOutput += "<tr><td>" + (i+1) + "</td><td>" + sorted[i][0] + "</td><td>" + sorted[i][1] + "</td><td>" + pcTotal + "</td><td>" + pcUnique + "</td></tr>";
}
strOutput += "</table>";
document.getElementById("report").innerHTML = strOutput;
}
function calculateByLength(){
var text = document.getElementById('t1').value;
var words = text.split(" ");
for (i=0;i<words.length;i++){
words[i] = cleanWord(words[i]);
}
var lengthFrequencies = new Array();
var maxWordLength = 0;
var uniqueLengthCount = 0;
// find max word length
for (i=0;i<words.length;i++){
maxWordLength = (words[i].length > maxWordLength) ? words[i].length : maxWordLength;
}
// for each possible word length, find number of words of that length
for (i=maxWordLength;i>0;i--){
wordCounter = 0;
for (j=0;j<words.length;j++){
if (words[j].length == i){
wordCounter++;
}
}
if (wordCounter>0){
lengthFrequencies[uniqueLengthCount] = new Array(2);
lengthFrequencies[uniqueLengthCount][0] = i;
lengthFrequencies[uniqueLengthCount][1] =wordCounter;
uniqueLengthCount++;
}
}
// sort list by length frequencies instead of descending length order
var sorted = sortArray(lengthFrequencies);
// create HTML list output
var statsLength = (showFullStats == false) ? 10 : sorted.length;
strOutput = graph(sorted);
strOutput += statsControl;
strOutput += "<table class='stats' cellpadding=0 cellspacing=5 border=0>";
strOutput += "<tr><th> </th><th>Word Length</th><th>Count</th><th>% Total</th></tr>";
for (i=0;i<statsLength;i++){
pcTotal = (sorted[i][1]/words.length)*100;
pcTotal = pcTotal.toFixed(2);
strOutput += "<tr><td>" + (i+1) + "</td><td>" + sorted[i][0] + "</td><td>" + sorted[i][1] + "</td><td>" + pcTotal + "</td></tr>";
}
strOutput += "</table>";
document.getElementById("report").innerHTML = strOutput;
}
function initCalc(){
for (i=0;i<document.f1.r1.length;i++){
if (document.f1.r1[i].checked){
whichStats=document.f1.r1[i].value;
}
}
switch (whichStats){
case "freq":{
calculateByFrequency();
break;
}
case "leng": {
calculateByLength();
break;
}
}
}
</script>
</head>
<body>
<form id="f1" name="f1">
<textarea id="t1" name="t1" rows=6 cols=100>
Ten years ago a crack commando unit was sent to prison by a military court for a crime they didn't commit. These men promptly escaped from a maximum security stockade to the Los Angeles underground. Today, still wanted by the government, they survive as soldiers of fortune. If you have a problem and no one else can help, and if you can find them, maybe you can hire the A-team.
Knight Rider, a shadowy fight into the dangerous world of a man who does not exist. Michael Knight, a young loner on a crusade to champion the cause of the innocent, the helpless in a world of criminals who operate above the law.
</textarea>
<br/>
<input type="radio" name="r1" id="r1" value="freq" checked="checked">By word frequency<br/>
<input type="radio" name="r1" id="r1" value="leng">By word length<br/>
<br/>
<input type="button" name="b1" id="b1" value="Calculate" onClick="initCalc();">
</form>
<br/><br/>
<div id="report" name="report"></div>
</body>
</html>