// Used to replace src links that don't have a file extension // If the image src doesn't have a file type: // 1. Create a dummy link // 2. Detect image type from the binary data & create new links // 3. Replace all the dummy links in tmpGlobalContent with the new links var tmpGlobalContent = null var allImages = []; var extractedImages = []; var allowedTags = [ 'address', 'article', 'aside', 'footer', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hgroup', 'nav', 'section', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li', 'main', 'ol', 'p', 'pre', 'ul', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'img', 'kbd', 'mark', 'q', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr', 'del', 'ins', 'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'math', 'maction', 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mlabeledtr', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', 'mroot', 'mrow', 'ms', 'mspace', 'msqrt', 'mstyle', 'msub', 'msup', 'msubsup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'msgroup', 'mlongdiv', 'mscarries', 'mscarry', 'mstack', 'semantics' // TODO ? // ,'form', 'button' // TODO svg support ? // , 'svg', 'g', 'path', 'line', 'circle', 'text' ]; // const svgTags = ['svg', 'g', 'path', 'line', 'circle', 'text'] var mathMLTags = [ 'math', 'maction', 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mlabeledtr', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', 'mroot', 'mrow', 'ms', 'mspace', 'msqrt', 'mstyle', 'msub', 'msup', 'msubsup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'msgroup', 'mlongdiv', 'mscarries', 'mscarry', 'mstack', 'semantics' ] var cssClassesToTmpIds = {}; var tmpIdsToNewCss = {}; var tmpIdsToNewCssSTRING = {}; // src: https://idpf.github.io/a11y-guidelines/content/style/reference.html var supportedCss = [ 'background-color', 'border', 'color', 'font', 'line-height', 'list-style', 'padding', 'text-align', ]; ////// function getImageSrc(srcTxt) { if (!srcTxt) { return ''; } srcTxt = srcTxt.trim(); if (srcTxt === '') { return ''; } // TODO move srcTxt = srcTxt.replace(/&/g, '&') // TODO - convert with svg sources to jpeg OR add support for svg let fileExtension = getFileExtension(srcTxt); if (fileExtension === '') { fileExtension = "TODO-EXTRACT" } let newImgFileName = 'img-' + generateRandomNumber(true) + '.' + fileExtension; let isB64Img = isBase64Img(srcTxt); if (isB64Img) { extractedImages.push({ filename: newImgFileName, // TODO name data: getBase64ImgData(srcTxt) }); } else { allImages.push({ originalUrl: getImgDownloadUrl(srcTxt), filename: newImgFileName, // TODO name }); } return '../images/' + newImgFileName; } // tested function extractMathMl($htmlObject) { $htmlObject.find('span[id^="MathJax-Element-"]').each(function (i, el) { $(el).replaceWith('' + el.getAttribute('data-mathml') + ''); }); } // tested function extractCanvasToImg($htmlObject) { $htmlObject.find('canvas').each(function (index, elem) { try { let imgUrl = docEl.toDataURL('image/jpeg'); $(elem).replaceWith(''); } catch (e) { console.log(e) } }); } // tested function extractSvgToImg($htmlObject) { let serializer = new XMLSerializer(); $htmlObject.find('svg').each(function (index, elem) { // add width & height because the result image was too big let bbox = elem.getBoundingClientRect() let newWidth = bbox.width let newHeight = bbox.height let svgXml = serializer.serializeToString(elem); let imgSrc = 'data:image/svg+xml;base64,' + window.btoa(svgXml); $(elem).replaceWith('' + ''); }); } // replaces all iframes by divs with the same innerHTML content function extractIFrames() { let allIframes = document.getElementsByTagName('iframe') let changeIFrames = [] let newDivs = [] for (let iFrame of allIframes) { if (!iFrame.contentDocument || !iFrame.contentDocument.body) { continue } let bodyContent = iFrame.contentDocument.body.innerHTML let bbox = iFrame.getBoundingClientRect() let newDiv = document.createElement('div') newDiv.style.width = bbox.width newDiv.style.height = bbox.height newDiv.innerHTML = bodyContent changeIFrames.push(iFrame) newDivs.push(newDiv) } for (let i = 0; i < newDivs.length; i++) { let newDiv = newDivs[i] let iFrame = changeIFrames[i] let iframeParent = iFrame.parentNode iframeParent.replaceChild(newDiv, iFrame) } } function preProcess($htmlObject) { // TODO // $htmlObject.find('script, style, noscript, iframe').remove(); // $('body').find('script, style, noscript, iframe').remove() // $('body').find('script, style, noscript, iframe').contents().remove() // $('body').find('iframe').remove() // $('body').find('*:empty').not('img').not('br').not('hr').remove(); // formatPreCodeElements($('body')); extractMathMl($htmlObject); extractCanvasToImg($htmlObject); extractSvgToImg($htmlObject); } function parseHTML(rawContentString) { allImages = []; extractedImages = []; let results = ''; let lastFragment = ''; let lastTag = ''; try { HTMLParser(rawContentString, { start: function(tag, attrs, unary) { lastTag = tag; if (allowedTags.indexOf(tag) < 0) { return; } if (tag === 'img') { let tmpAttrsTxt = ''; let tmpSrc = '' for (let i = 0; i < attrs.length; i++) { if (attrs[i].name === 'src') { tmpSrc = getImageSrc(attrs[i].value) tmpAttrsTxt += ' src="' + tmpSrc + '"'; } else if (attrs[i].name === 'data-class') { tmpAttrsTxt += ' class="' + attrs[i].value + '"'; } else if (attrs[i].name === 'width') { // used when converting svg to img - the result image was too big tmpAttrsTxt += ' width="' + attrs[i].value + '"'; } else if (attrs[i].name === 'height') { // used when converting svg to img - the result image was too big tmpAttrsTxt += ' height="' + attrs[i].value + '"'; } } if (tmpSrc === '') { // ignore imgs without source lastFragment = '' } else { lastFragment = tmpAttrsTxt.length === 0 ? '' : ''; } } else if (tag === 'a') { let tmpAttrsTxt = ''; for (let i = 0; i < attrs.length; i++) { if (attrs[i].name === 'href') { tmpAttrsTxt += ' href="' + getHref(attrs[i].value) + '"'; } else if (attrs[i].name === 'data-class') { tmpAttrsTxt += ' class="' + attrs[i].value + '"'; } } lastFragment = tmpAttrsTxt.length === 0 ? '' : ''; } else if (tag === 'br' || tag === 'hr') { let tmpAttrsTxt = ''; for (let i = 0; i < attrs.length; i++) { if (attrs[i].name === 'data-class') { tmpAttrsTxt += ' class="' + attrs[i].value + '"'; } } lastFragment = '<' + tag + tmpAttrsTxt + '>'; } else if (tag === 'math') { let tmpAttrsTxt = ''; tmpAttrsTxt += ' xmlns="http://www.w3.org/1998/Math/MathML"'; for (let i = 0; i < attrs.length; i++) { if (attrs[i].name === 'alttext') { tmpAttrsTxt += ' alttext="' + attrs[i].value + '"'; } } lastFragment = '<' + tag + tmpAttrsTxt + '>'; } else { let tmpAttrsTxt = ''; for (let i = 0; i < attrs.length; i++) { if (attrs[i].name === 'data-class') { tmpAttrsTxt += ' class="' + attrs[i].value + '"'; } } lastFragment = '<' + tag + tmpAttrsTxt + '>'; } results += lastFragment; lastFragment = ''; }, end: function(tag) { if (allowedTags.indexOf(tag) < 0 || tag === 'img' || tag === 'br' || tag === 'hr') { return; } results += ""; }, chars: function(text) { if (lastTag !== '' && allowedTags.indexOf(lastTag) < 0) { return; } results += text; }, comment: function(text) { // results += ""; } }); // TODO - (re)move results = results.replace(/ /gi, ' '); return results; } catch (e) { console.log('Error:', e); return 'Error: ' + e //+" " + force($(rawContentString)) } } function getContent(htmlContent) { try { // TODO - move; called multiple times on selection preProcess($('body')) let tmp = document.createElement('div'); tmp.appendChild(htmlContent.cloneNode(true)); let tmpHtml = '
' + tmp.innerHTML + '
'; return parseHTML(tmpHtml); } catch (e) { console.log('Error:', e); return htmlContent; } } ///// function getSelectedNodes() { // if (document.selection) { // return document.selection.createRange().parentElement(); // return document.selection.createRange(); // } let selection = window.getSelection(); let docfrag = []; for (let i = 0; i < selection.rangeCount; i++) { docfrag.push(selection.getRangeAt(i).cloneContents()); } return docfrag; } ///// function extractCss(includeStyle, appliedStyles) { if (includeStyle) { $('body').find('*').each((i, pre) => { let $pre = $(pre); if (allowedTags.indexOf(pre.tagName.toLowerCase()) < 0) return; if (mathMLTags.indexOf(pre.tagName.toLowerCase()) > -1) return; if (!$pre.is(':visible')) { $pre.replaceWith(''); } else { if (pre.tagName.toLowerCase() === 'svg') return; let classNames = pre.getAttribute('class'); if (!classNames) { classNames = pre.getAttribute('id'); if (!classNames) { classNames = pre.tagName + '-' + generateRandomNumber(); } } let tmpName = cssClassesToTmpIds[classNames]; let tmpNewCss = tmpIdsToNewCss[tmpName]; if (!tmpName) { // TODO - collision between class names when multiple pages tmpName = generateRandomTag(2) + i cssClassesToTmpIds[classNames] = tmpName; } if (!tmpNewCss) { tmpNewCss = {}; for (let cssTagName of supportedCss) { let cssValue = $pre.css(cssTagName); if (cssValue && cssValue.length > 0) { tmpNewCss[cssTagName] = cssValue; } } // Reuse CSS - if the same css code was generated for another element, reuse it's class name let tcss = JSON.stringify(tmpNewCss) let found = false if (Object.keys(tmpIdsToNewCssSTRING).length === 0) { tmpIdsToNewCssSTRING[tmpName] = tcss; tmpIdsToNewCss[tmpName] = tmpNewCss; } else { for (const key in tmpIdsToNewCssSTRING) { if (tmpIdsToNewCssSTRING[key] === tcss) { tmpName = key found = true break } } if (!found) { tmpIdsToNewCssSTRING[tmpName] = tcss; tmpIdsToNewCss[tmpName] = tmpNewCss; } } } pre.setAttribute('data-class', tmpName); } }); return jsonToCss(tmpIdsToNewCss); } else { // remove hidden elements when style is not included $('body').find('*').each((i, pre) => { let $pre = $(pre) if (!$pre.is(':visible')) { $pre.replaceWith('') } }) let mergedCss = ''; if (appliedStyles && appliedStyles.length > 0) { for (let i = 0; i < appliedStyles.length; i++) { mergedCss += appliedStyles[i].style; } return mergedCss; } } return null } ///// function deferredAddZip(url, filename) { let deferred = $.Deferred(); JSZipUtils.getBinaryContent(url, function(err, data) { if (err) { // deferred.reject(err); TODO console.log('Error:', err); deferred.resolve(); } else { // TODO - move to utils.js if (filename.endsWith("TODO-EXTRACT")) { let oldFilename = filename let arr = (new Uint8Array(data)).subarray(0, 4); let header = ""; for(let i = 0; i < arr.length; i++) { header += arr[i].toString(16); } if (header.startsWith("89504e47")) { filename = filename.replace("TODO-EXTRACT", "png") } else if (header.startsWith("47494638")) { filename = filename.replace("TODO-EXTRACT", "gif") } else if (header.startsWith("ffd8ff")) { filename = filename.replace("TODO-EXTRACT", "jpg") } else { // ERROR console.log("Error! Unable to extract the image type!"); deferred.resolve(); } tmpGlobalContent = tmpGlobalContent.replace(oldFilename, filename) } extractedImages.push({ filename: filename, // TODO - must be JSON serializable data: base64ArrayBuffer(data) }); deferred.resolve(); } }); return deferred; } chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { let imgsPromises = []; let result = {}; let pageSrc = ''; let tmpContent = ''; let styleFile = null; extractIFrames() if (request.type === 'extract-page') { styleFile = extractCss(request.includeStyle, request.appliedStyles) pageSrc = document.getElementsByTagName('body')[0]; tmpContent = getContent(pageSrc); } else if (request.type === 'extract-selection') { styleFile = extractCss(request.includeStyle, request.appliedStyles) pageSrc = getSelectedNodes(); pageSrc.forEach((page) => { tmpContent += getContent(page); }); } tmpGlobalContent = tmpContent allImages.forEach((tmpImg) => { imgsPromises.push(deferredAddZip(tmpImg.originalUrl, tmpImg.filename)); }); $.when.apply($, imgsPromises).done(() => { let tmpTitle = getPageTitle(document.title); result = { url: getPageUrl(tmpTitle), title: tmpTitle, baseUrl: getCurrentUrl(), styleFileContent: styleFile, styleFileName: 'style' + generateRandomNumber() + '.css', images: extractedImages, content: tmpGlobalContent }; sendResponse(result); }).fail((e) => { console.log('Error:', e); sendResponse(null) }); return true; });