Forum Moderators: open
// HTML
<div id="contenteditable">
thïš ïš ä prétty<b></b> thöröûgh štrïng
</div>
// JQuery
// I know jQuery isn't necessary here, but I'm already using it for other things
String.prototype.encodeHTML = function () {
return this.replace(/[\u0080-\u024F]/g,
function (v) {return '&#'+v.charCodeAt()+';';}
);
}
alert($('#contenteditable').html().encodeHTML());
// Returns
// thïš ïš ä prétty<b></b> thöröûgh štrïng var entities = {
'à': 'a',
'À': 'A',
// and so on
};
var str = ($('#contenteditable').html()
.replace(/&#(\d+);/g, entities[$1]); var entities = {
'224': 'a',
'192': 'A',
// and so on
};
String.prototype.encodeHTML = function () {
return this.replace(/[\u0080-\u024F]/g,
function (v) { return entities[v.charCodeAt()]; }
);
}
var str = ($('#contenteditable').html().encodeHTML()); [edited by: not2easy at 5:44 pm (utc) on Nov 6, 2021]
[edit reason] disabled smileys [/edit]
var str = 'thïš ïš ä prétty<b></b> thöröûgh štrïng';
// Step 1, convert anything not Basic Latin to decimal reference
// 0B7F is the highest one I could find listed anywhere, although
// it really goes much further
String.prototype.encodeHTML = function () {
return this.replace(/[\u00A2-\u0B7F]/g,
function (v) {
return '&#' + v.charCodeAt() + ";";
}
);
}
// run the encodeHTML function
str = str.encodeHTML();
// Next, create a list of entities that look like Basic Latin characters
// my full list is 950 lines long, I can post it if anyone wants it
// I included decimal references for basic Latin, in case someone
// manually types A in an attempt to sneak by
var entities = {
// A => "A"
65 : 'A',
...
1514 : 'n'
};
// Next, convert any decimal references in str to the object value
// defined in "entities"
str.match(/(&#(\d+);)/);
const num = RegExp.$2;
if (entities[num] !== undefined)
str = str.replace(RegExp.$1, entities[num]); e.65 =
e.192 =
e.193 = 'A'; var str = 'thïš ïš ä prétty<b></b> thöröûgh štrïng';
var entities = {
65 : 'A',
...
1514 : 'n'
};
String.prototype.encodeHTML = function () {
return this.replace(/[\u00A2-\u0B7F]/g,
function (v) {
return entities[v.charCodeAt()] || v;
}
);
}
str = str.encodeHTML(); var str = 'thïš ïš ä prétty<b></b> thöröûgh štrïng';
if (/[\u00A2-\u0B7F]/.test(str)) {
var entities = {
65 : 'A',
...
1514 : 'n'
};
// yada yada yada
str = str.encodeHTML();
}