var allImages = []; var extractedImages = []; var maxNrOfElements = 10000; ////// function getImageSrc(srcTxt) { if (!srcTxt) { return ''; } var isB64Img = isBase64Img(srcTxt); var fileExtension = getFileExtension(srcTxt); var newImgFileName = 'img-' + (Math.floor(Math.random()*1000000*Math.random()*100000)) + '.' + fileExtension; if (isB64Img) { extractedImages.push({ filename: newImgFileName, // TODO name data: getBase64ImgData(srcTxt) }); } else { allImages.push({ originalUrl: getImgDownloadUrl(srcTxt), filename: newImgFileName, // TODO name }); } return '../images/' + newImgFileName; } function generateRandomTag() { var text = ""; var possible = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; for(var i = 0; i < 5; i++) text += possible.charAt(Math.floor(Math.random() * possible.length)); return text; } function formatPreCodeElements($jQueryElement) { $jQueryElement.find('pre').each(function (i, pre) { $(pre).replaceWith('
' + pre.innerText + ''); }); $jQueryElement.find('code').each(function (i, pre) { $(pre).replaceWith('
' + pre.innerText + '
');
});
}
// function force3(dirty) {
// var tagOpen = '@@@';// + generateRandomTag();
// var tagClose = '###';// + generateRandomTag();
// var removeElements = ['script', 'style', 'svg', 'canvas', 'noscript'];
// var inlineElements = ['h1', 'h2', 'h3', 'sup', 'b', 'i', 'em', 'code', 'pre', 'p'];
// var replaceElements = [['li', 'p'], ['tr', 'p']];
//
// // var bodyClone = document.getElementsByTagName('body')[0].cloneNode(true);
//
// var bodyClone = document.createElement('div');
// bodyClone.innerHTML = dirty;
//
//
// /////
//
// var imgs = bodyClone.getElementsByTagName('img');
// for (var i = 0; i < imgs.length; i++) {
// var newImg = document.createElement('span');
// newImg.innerHTML = tagOpen + 'img src="' + getImageSrc(imgs[i].getAttribute('src')) + '"' + tagClose + tagOpen + '/img' + tagClose;
// imgs[i].parentNode.replaceChild(newImg, imgs[i]);
// }
//
// var links = bodyClone.getElementsByTagName('a');
// for (i = 0; i < links.length; i++) {
// var newLink = document.createElement('span');
// newLink.innerHTML = tagOpen + 'a href="' + getHref(links[i].getAttribute('href')) + '"' + tagClose + links[i].innerHTML + tagOpen + '/a' + tagClose;
// links[i].parentNode.replaceChild(newLink, links[i]);
// }
//
// for (i = 0; i < inlineElements.length; i++) {
// var tagName = inlineElements[i];
// var miscElements = bodyClone.getElementsByTagName(tagName);
// for (var j = 0; j < miscElements.length; j++) {
// var elemToBeReplaced = miscElements[j];
// var newElement = document.createElement('span');
// newElement.innerHTML = tagOpen + tagName + tagClose + elemToBeReplaced.innerHTML + tagOpen + '/' + tagName + tagClose;
// elemToBeReplaced.parentNode.replaceChild(newElement, elemToBeReplaced);
// }
// }
//
// for (i = 0; i < replaceElements.length; i++) {
// var crtTagPair = replaceElements[i];
// var searchForTag = crtTagPair[0];
// var replaceWithTag = crtTagPair[1];
// var miscElements = bodyClone.getElementsByTagName(searchForTag);
// for (var j = 0; j < miscElements.length; j++) {
// var elemToBeReplaced = miscElements[j];
// var newElement = document.createElement('span');
// newElement.innerHTML = tagOpen + replaceWithTag + tagClose + elemToBeReplaced.innerHTML + tagOpen + '/' + replaceWithTag + tagClose;
// elemToBeReplaced.parentNode.replaceChild(newElement, elemToBeReplaced);
// }
// }
//
// var contentString = bodyClone.innerText;
//
// var tagOpenRegex = new RegExp(tagOpen, 'gi');
// var tagCloseRegex = new RegExp(tagClose, 'gi');
// contentString = contentString.replace(tagOpenRegex, '<');
// contentString = contentString.replace(tagCloseRegex, '>');
// contentString = contentString.replace(/&/gi, '&');
// contentString = contentString.replace(/&/gi, '&');
//
// return contentString;
//
// }
function force(contentString) {
try {
var tagOpen = '@@@' + generateRandomTag();
var tagClose = '###' + generateRandomTag();
var inlineElements = ['h1', 'h2', 'h3', 'sup', 'b', 'i', 'em', 'code', 'pre', 'p'];
var replaceElements = [['li', 'p'], ['tr', 'p']];
var $content = $(contentString);
formatPreCodeElements($content);
$content.find('img').each(function (index, elem) {
$(elem).replaceWith('' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src').trim()) + '"' + tagClose + tagOpen + '/img' + tagClose + '');
});
$content.find('a').each(function (index, elem) {
$(elem).replaceWith('' + tagOpen + 'a href="' + getHref($(elem).attr('href').trim()) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '');
});
if ($('*').length < maxNrOfElements) {
replaceElements.forEach(function (replacePair) {
var searchFor = replacePair[0];
var tagName = replacePair[1];
var tmpElems = $content.find(searchFor);
while (tmpElems.length > 0) {
$tmpElem = $(tmpElems[0]);
$tmpElem.replaceWith('' + tagOpen + tagName + tagClose + $tmpElem.html() + tagOpen + '/' + tagName + tagClose + '');
tmpElems = $content.find(searchFor);
}
});
inlineElements.forEach(function (tagName) {
var tmpElems = $content.find(tagName);
while (tmpElems.length > 0) {
$tmpElem = $(tmpElems[0]);
$tmpElem.replaceWith('' + tagOpen + tagName + tagClose + $tmpElem.html() + tagOpen + '/' + tagName + tagClose + '');
tmpElems = $content.find(tagName);
}
});
}
contentString = $content.text();
var tagOpenRegex = new RegExp(tagOpen, 'gi');
var tagCloseRegex = new RegExp(tagClose, 'gi');
contentString = contentString.replace(tagOpenRegex, '<');
contentString = contentString.replace(tagCloseRegex, '>');
contentString = contentString.replace(/&/gi, '&');
contentString = contentString.replace(/&/gi, '&');
return contentString;
} catch (e) {
console.log('Error:', e);
}
}
// https://github.com/blowsie/Pure-JavaScript-HTML5-Parser
function sanitize(rawContentString) {
allImages = [];
extractedImages = [];
var srcTxt = '';
var dirty = null;
try {
var wdirty = $.parseHTML(rawContentString);
$wdirty = $(wdirty);
$wdirty.find('script, style, svg, canvas, noscript').remove(); // TODO remove iframes
$wdirty.find('*:empty').not('img').remove();
formatPreCodeElements($wdirty);
dirty = '