mirror of
https://github.com/alexadam/save-as-ebook.git
synced 2025-09-10 17:34:47 +00:00
add support for more html elements
This commit is contained in:
parent
9f4b6be294
commit
c8b4771cfe
1 changed files with 17 additions and 105 deletions
|
@ -1,6 +1,15 @@
|
|||
var allImages = [];
|
||||
var extractedImages = [];
|
||||
var maxNrOfElements = 10000;
|
||||
var allowedTags = [
|
||||
'address', 'article', 'aside', 'footer', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'hgroup', 'nav', 'section', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li',
|
||||
'main', 'ol', 'p', 'pre', 'ul', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data',
|
||||
'dfn', 'em', 'i', 'img', 'kbd', 'mark', 'q', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span',
|
||||
'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr', 'del', 'ins', 'caption', 'col', 'colgroup',
|
||||
'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
|
||||
];
|
||||
|
||||
//////
|
||||
|
||||
function getImageSrc(srcTxt) {
|
||||
|
@ -45,108 +54,31 @@ function formatPreCodeElements($jQueryElement) {
|
|||
});
|
||||
}
|
||||
|
||||
// function force3(dirty) {
|
||||
// var tagOpen = '@@@';// + generateRandomTag();
|
||||
// var tagClose = '###';// + generateRandomTag();
|
||||
// var removeElements = ['script', 'style', 'svg', 'canvas', 'noscript'];
|
||||
// var inlineElements = ['h1', 'h2', 'h3', 'sup', 'b', 'i', 'em', 'code', 'pre', 'p'];
|
||||
// var replaceElements = [['li', 'p'], ['tr', 'p']];
|
||||
//
|
||||
// // var bodyClone = document.getElementsByTagName('body')[0].cloneNode(true);
|
||||
//
|
||||
// var bodyClone = document.createElement('div');
|
||||
// bodyClone.innerHTML = dirty;
|
||||
//
|
||||
//
|
||||
// /////
|
||||
//
|
||||
// var imgs = bodyClone.getElementsByTagName('img');
|
||||
// for (var i = 0; i < imgs.length; i++) {
|
||||
// var newImg = document.createElement('span');
|
||||
// newImg.innerHTML = tagOpen + 'img src="' + getImageSrc(imgs[i].getAttribute('src')) + '"' + tagClose + tagOpen + '/img' + tagClose;
|
||||
// imgs[i].parentNode.replaceChild(newImg, imgs[i]);
|
||||
// }
|
||||
//
|
||||
// var links = bodyClone.getElementsByTagName('a');
|
||||
// for (i = 0; i < links.length; i++) {
|
||||
// var newLink = document.createElement('span');
|
||||
// newLink.innerHTML = tagOpen + 'a href="' + getHref(links[i].getAttribute('href')) + '"' + tagClose + links[i].innerHTML + tagOpen + '/a' + tagClose;
|
||||
// links[i].parentNode.replaceChild(newLink, links[i]);
|
||||
// }
|
||||
//
|
||||
// for (i = 0; i < inlineElements.length; i++) {
|
||||
// var tagName = inlineElements[i];
|
||||
// var miscElements = bodyClone.getElementsByTagName(tagName);
|
||||
// for (var j = 0; j < miscElements.length; j++) {
|
||||
// var elemToBeReplaced = miscElements[j];
|
||||
// var newElement = document.createElement('span');
|
||||
// newElement.innerHTML = tagOpen + tagName + tagClose + elemToBeReplaced.innerHTML + tagOpen + '/' + tagName + tagClose;
|
||||
// elemToBeReplaced.parentNode.replaceChild(newElement, elemToBeReplaced);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for (i = 0; i < replaceElements.length; i++) {
|
||||
// var crtTagPair = replaceElements[i];
|
||||
// var searchForTag = crtTagPair[0];
|
||||
// var replaceWithTag = crtTagPair[1];
|
||||
// var miscElements = bodyClone.getElementsByTagName(searchForTag);
|
||||
// for (var j = 0; j < miscElements.length; j++) {
|
||||
// var elemToBeReplaced = miscElements[j];
|
||||
// var newElement = document.createElement('span');
|
||||
// newElement.innerHTML = tagOpen + replaceWithTag + tagClose + elemToBeReplaced.innerHTML + tagOpen + '/' + replaceWithTag + tagClose;
|
||||
// elemToBeReplaced.parentNode.replaceChild(newElement, elemToBeReplaced);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// var contentString = bodyClone.innerText;
|
||||
//
|
||||
// var tagOpenRegex = new RegExp(tagOpen, 'gi');
|
||||
// var tagCloseRegex = new RegExp(tagClose, 'gi');
|
||||
// contentString = contentString.replace(tagOpenRegex, '<');
|
||||
// contentString = contentString.replace(tagCloseRegex, '>');
|
||||
// contentString = contentString.replace(/&/gi, '&');
|
||||
// contentString = contentString.replace(/&/gi, '&');
|
||||
//
|
||||
// return contentString;
|
||||
//
|
||||
// }
|
||||
|
||||
function force(contentString) {
|
||||
try {
|
||||
var tagOpen = '@@@' + generateRandomTag();
|
||||
var tagClose = '###' + generateRandomTag();
|
||||
var inlineElements = ['h1', 'h2', 'h3', 'sup', 'b', 'i', 'em', 'code', 'pre', 'p'];
|
||||
var replaceElements = [['li', 'p'], ['tr', 'p']];
|
||||
var startEl = '<object>';
|
||||
var endEl = '</object>';
|
||||
|
||||
var $content = $(contentString);
|
||||
|
||||
formatPreCodeElements($content);
|
||||
|
||||
$content.find('img').each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src').trim()) + '"' + tagClose + tagOpen + '/img' + tagClose + '</span>');
|
||||
$(elem).replaceWith(startEl + tagOpen + 'img src="' + getImageSrc($(elem).attr('src').trim()) + '"' + tagClose + tagOpen + '/img' + tagClose + endEl);
|
||||
});
|
||||
|
||||
$content.find('a').each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'a href="' + getHref($(elem).attr('href').trim()) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '</span>');
|
||||
$(elem).replaceWith(startEl + tagOpen + 'a href="' + getHref($(elem).attr('href').trim()) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + endEl);
|
||||
});
|
||||
|
||||
if ($('*').length < maxNrOfElements) {
|
||||
replaceElements.forEach(function (replacePair) {
|
||||
var searchFor = replacePair[0];
|
||||
var tagName = replacePair[1];
|
||||
var tmpElems = $content.find(searchFor);
|
||||
while (tmpElems.length > 0) {
|
||||
$tmpElem = $(tmpElems[0]);
|
||||
$tmpElem.replaceWith('<span>' + tagOpen + tagName + tagClose + $tmpElem.html() + tagOpen + '/' + tagName + tagClose + '</span>');
|
||||
tmpElems = $content.find(searchFor);
|
||||
}
|
||||
});
|
||||
|
||||
inlineElements.forEach(function (tagName) {
|
||||
allowedTags.forEach(function (tagName) {
|
||||
var tmpElems = $content.find(tagName);
|
||||
while (tmpElems.length > 0) {
|
||||
$tmpElem = $(tmpElems[0]);
|
||||
$tmpElem.replaceWith('<span>' + tagOpen + tagName + tagClose + $tmpElem.html() + tagOpen + '/' + tagName + tagClose + '</span>');
|
||||
$tmpElem.replaceWith(startEl + tagOpen + tagName + tagClose + $tmpElem.html() + tagOpen + '/' + tagName + tagClose + endEl);
|
||||
tmpElems = $content.find(tagName);
|
||||
}
|
||||
});
|
||||
|
@ -158,7 +90,7 @@ function force(contentString) {
|
|||
var tagCloseRegex = new RegExp(tagClose, 'gi');
|
||||
contentString = contentString.replace(tagOpenRegex, '<');
|
||||
contentString = contentString.replace(tagCloseRegex, '>');
|
||||
contentString = contentString.replace(/&nbsp;/gi, ' ');
|
||||
contentString = contentString.replace(/ /gi, ' ');
|
||||
|
||||
return contentString;
|
||||
} catch (e) {
|
||||
|
@ -175,7 +107,7 @@ function sanitize(rawContentString) {
|
|||
try {
|
||||
var wdirty = $.parseHTML(rawContentString);
|
||||
$wdirty = $(wdirty);
|
||||
$wdirty.find('script, style, svg, canvas, noscript').remove(); // TODO remove iframes
|
||||
$wdirty.find('script, style, svg, canvas, noscript, iframe').remove();
|
||||
$wdirty.find('*:empty').not('img').remove();
|
||||
formatPreCodeElements($wdirty);
|
||||
|
||||
|
@ -188,12 +120,6 @@ function sanitize(rawContentString) {
|
|||
var results = '';
|
||||
var lastFragment = '';
|
||||
var lastTag = '';
|
||||
var inList = false;
|
||||
var allowedTags = ['div', 'p', 'code', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', 'blockquote',
|
||||
'img', 'a', 'ol', 'ul', 'li', 'b', 'i', 'sup', 'strong', 'strike',
|
||||
'table', 'tr', 'td', 'th', 'thead', 'tbody', 'pre', 'em'
|
||||
];
|
||||
var allowedTextTags = ['h4', 'h5', 'h6', 'span'];
|
||||
|
||||
HTMLParser(dirty, {
|
||||
start: function(tag, attrs, unary) {
|
||||
|
@ -202,13 +128,6 @@ function sanitize(rawContentString) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (tag === 'ol' || tag === 'ul') {
|
||||
inList = true;
|
||||
}
|
||||
if (tag === 'li' && !inList) {
|
||||
tag = 'p';
|
||||
}
|
||||
|
||||
var tattrs = null;
|
||||
if (tag === 'img') {
|
||||
tattrs = attrs.filter(function(attr) {
|
||||
|
@ -236,13 +155,6 @@ function sanitize(rawContentString) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (tag === 'ol' || tag === 'ul') {
|
||||
inList = false;
|
||||
}
|
||||
if (tag === 'li' && !inList) {
|
||||
tag = 'p';
|
||||
}
|
||||
|
||||
results += "</" + tag + ">\n";
|
||||
},
|
||||
chars: function(text) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue