Forum Moderators: open
i have this bit of code to parse a table in a page, remove all the formatting and pass the string through to another asp page via the form.
the idea is to click a link that fires this script, that will do what i said above, and open the new page outputting ht table, and setting the contenttype to excel.
sadly it wont work with excel 2k.
im wondering if im doing somethign wrong in the JS.
an help much appreciated.
nat
<script type="text/javascript">
function excel(frm){
//get all the tables
var tabs = document.getElementsByTagName("table");
var e;
//clear out any hidden fields in the form from previous prints
for ( x=0;x<frm.elements.length;x++){
frm.removeChild(frm.elements[x]);
}
var tb,v,tr,tb1,tr1,tb1_t;
//loop round filling new hidden inputs with table innerHTML
for(i=1;i<tabs.length-1;i++){
tb = tabs[i].cloneNode(true);
tb_new = document.createElement("table");
for (y=0;y<tb.firstChild.childNodes.length;y++){
tr_new = document.createElement("tr");
//loop around the trs
tr = tb.firstChild.childNodes[y];
for (x=0;x<tr.childNodes.length;x++){
//now loop around the tds
td = tr.childNodes[x];
if (y==0){
td_new = document.createElement("th");
td_new_t = document.createTextNode(td.innerText);
}else{
td_new = document.createElement("td");
td_new_t = document.createTextNode(td.innerText);
}
td_new.appendChild(td_new_t);
tr_new.appendChild(td_new);
}
tb_new.appendChild(tr_new);
}
e = document.createElement("input");
e.setAttribute("type","hidden");
e.setAttribute("name","h"+i);
****x = tb_new.innerHTML;
xxxx = String(xxxx);
e.setAttribute("value",xxxx);
//e.setAttribute("value",tabs[1].innerHTML);
frm.appendChild(e);
}
//submit the page to the new excel page.
document.forms[0].action = "xl.asp";
document.forms[0].target = "_blank";
document.forms[0].method = "post";
document.forms[0].submit();
}
</script>
//loop round filling new hidden inputs with table innerHTML
for(i=1;i<tabs.length-1;i++){.... This will miss out the first and the last table (or THE table, if there's just one). This may, of course, be your intention.
//clear out any hidden fields in the form from previous prints
for ( x=0;x<frm.elements.length;x++){
frm.removeChild(frm.elements[x]);
}
while(frm.elements[0]){
frm.removeChild(frm.elements[0])
} document.createTextNode(td.innerText); If you're working with IE only, then using innerText is OK (quicker too), but then you don't need to use createTextNode. Just use innerText all the time.
thanks as always for your well explained reply..
yes, it was the idea to not do the first and last tables, and for the moment IE is all that is needed.
i will mull this lot over, and see what i can change..
ps - if i were to clone the whole thing, then to go through getting rid of all the formatting, can i simply use setAttribute("blah" , "")?
or is there a more cunning way..
nat
This latest dirty exploit pays no mind to standards, it just does what works (very conveniently)in IE. This means that, as well as using innerText, it uses the children collection, which is elements only (childNodes includes other node types, and we'd have to filter). It also uses the _attribute.specified property.
Removing formatting could mean removing attributes, or removing tags, like <b> inside cells. The function below does both.
When looping attributes, we use specified, else we'd be removing attributes that won't show up on outer/innerHTML anyway. This would probably remove colspan attributes, resulting in weirdness. Lets assume that your table doesn't contain any row, or column spanning cells.
Removing formatting tags in cells is done by replacing innerText with innerText. This makes lots of sense, even though it doesn't make any at all. The only problem on the horizon is that if your cells contain structural tags, <p> for eg, they'll go too.
<html><head>
<script>
window.onload = function()
{
var tab = document.getElementById("tab");
var tabCopy = tab.cloneNode(true);
strip(tabCopy);
alert(tabCopy.outerHTML);
}function strip(elm)
{
var k=0, atts = elm.attributes,
L = atts.length, child;/* strip attributes */
for(;k<L;k++){
if(atts[k].specified)
elm.removeAttribute(atts[k].nodeName);
/* ..but it misses this */
elm.removeAttribute("style");
}if(elm.tagName.toLowerCase()=="td")
/* remove tags in cell contents (odd code, I know)*/
elm.innerText = elm.innerText;/* go deeper */
k=0;
while(child=elm.children[k++])
strip(child);
}</script>
</head>
<body><table id="tab" border="1" cellpadding="0" cellspacing="0">
<tr>
<td style="color:red;">one:one</td><td align="right">one:two</td>
</tr>
<tr>
<td style="color:green;">two:one</td><td align="right">two:two</td>
</tr>
<tr>
<td id="text"><b>three:one</b></td><td align="right">three:two</td>
</tr>
</table></body>
</html>
That's just tables stripped. I know there's more to do. I wasn't sure what exactly.
Sorry.
elm.removeAttribute("style")
elm.removeAttribute("class[red]Name[/red]")
But I don't like where this is leading. How many others are there like this, that either aren't showing up as attributes, or not as .specified? I don't know what the deal is here. To add to the confusion, one reference says that the method doesn't remove [class,style,align, + eventhandlers], but it seems to.
I'm considering a U-turn, and building a string instead...
window.onload = function()
{
var tab = document.getElementById("tab");
alert(getBasicHTML(tab));
}function getBasicHTML(elm, arr)
{
arr = arr¦¦[];
var tagName = elm.tagName.toLowerCase(),
tags = T(tagName),
isCell = tagName=="td",
k=0, child;
arr.push(tags[0]);
if(isCell)
arr.push(elm.innerText);
else
while(child=elm.children[k++])
getBasicHTML(child, arr);
arr.push(tags[1]);
return arr.join('');
/*-- inner fn: creates open/close tags --*/
function T(name){
return ['<x>'.replace('x',name),'<\/x>'.replace('x',name)];
}
}
function getBasicHTML(elm)
{
var arr = []; repeat(elm);
function repeat(elm)
{
var tagName = elm.tagName.toLowerCase(),
tags = ['<'+tagName+'>','<\/'+tagName+'>'],
isCell = tagName=="td",
k=0, child;arr.push(tags[0]);
if(isCell)
arr.push(elm.innerText);
else
while(child=elm.children[k++])
repeat(child);
arr.push(tags[1]);
}
return arr.join('');
}