mirror of
https://github.com/alexadam/save-as-ebook.git
synced 2025-09-10 01:15:09 +00:00
add support for base64 imgs; don't execute extract scripts multiple times on the same tab
This commit is contained in:
parent
cc68e99a94
commit
0124977a37
3 changed files with 111 additions and 53 deletions
|
@ -1,5 +1,5 @@
|
|||
var allImgSrc = {};
|
||||
var allImages = [];
|
||||
var extractedImages = [];
|
||||
var maxNrOfElements = 10000;
|
||||
//////
|
||||
|
||||
|
@ -7,8 +7,23 @@ function getImageSrc(srcTxt) {
|
|||
if (!srcTxt) {
|
||||
return '';
|
||||
}
|
||||
allImgSrc[srcTxt] = 'img-' + (Math.floor(Math.random()*1000000)) + '.' + getFileExtension(srcTxt);
|
||||
return '../images/' + allImgSrc[srcTxt];
|
||||
var isB64Img = isBase64Img(srcTxt);
|
||||
var fileExtension = getFileExtension(srcTxt);
|
||||
var newImgFileName = 'img-' + (Math.floor(Math.random()*1000000*Math.random()*100000)) + '.' + fileExtension;
|
||||
|
||||
if (isB64Img) {
|
||||
extractedImages.push({
|
||||
filename: newImgFileName, // TODO name
|
||||
data: getBase64ImgData(srcTxt)
|
||||
});
|
||||
} else {
|
||||
allImages.push({
|
||||
originalUrl: getImgDownloadUrl(srcTxt),
|
||||
filename: newImgFileName, // TODO name
|
||||
});
|
||||
}
|
||||
|
||||
return '../images/' + newImgFileName;
|
||||
}
|
||||
|
||||
function generateRandomTag() {
|
||||
|
@ -98,11 +113,11 @@ function force(contentString) {
|
|||
var $content = $(contentString);
|
||||
|
||||
$content.find('img').each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src')) + '"' + tagClose + tagOpen + '/img' + tagClose + '</span>');
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src').trim()) + '"' + tagClose + tagOpen + '/img' + tagClose + '</span>');
|
||||
});
|
||||
|
||||
$content.find('a').each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'a href="' + getHref($(elem).attr('href')) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '</span>');
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'a href="' + getHref($(elem).attr('href').trim()) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '</span>');
|
||||
});
|
||||
|
||||
if ($('*').length < maxNrOfElements) {
|
||||
|
@ -144,14 +159,15 @@ function force(contentString) {
|
|||
|
||||
// https://github.com/blowsie/Pure-JavaScript-HTML5-Parser
|
||||
function sanitize(rawContentString) {
|
||||
allImgSrc = {};
|
||||
allImages = [];
|
||||
extractedImages = [];
|
||||
var srcTxt = '';
|
||||
var dirty = null;
|
||||
try {
|
||||
// dirty = getHtmlAsString(rawContent);
|
||||
wdirty = $.parseHTML(rawContentString);
|
||||
var wdirty = $.parseHTML(rawContentString);
|
||||
$wdirty = $(wdirty);
|
||||
$wdirty.find('script, style, svg, canvas, noscript').remove();
|
||||
$wdirty.find('script, style, svg, canvas, noscript').remove(); // TODO remove iframes
|
||||
$wdirty.find('*:empty').not('img').remove();
|
||||
|
||||
dirty = '<div>' + $wdirty.html() + '</div>';
|
||||
|
@ -190,14 +206,14 @@ function sanitize(rawContentString) {
|
|||
tattrs = attrs.filter(function(attr) {
|
||||
return attr.name === 'src';
|
||||
}).map(function(attr) {
|
||||
return getImageSrc(attr.escaped);
|
||||
return getImageSrc(decodeHtmlEntity(attr.value).trim());
|
||||
});
|
||||
lastFragment = tattrs.length === 0 ? '<img></img>' : '<img src="' + tattrs[0] + '" alt=""></img>';
|
||||
} else if (tag === 'a') {
|
||||
tattrs = attrs.filter(function(attr) {
|
||||
return attr.name === 'href';
|
||||
}).map(function(attr) {
|
||||
return getHref(attr.escaped);
|
||||
return getHref(decodeHtmlEntity(attr.value).trim());
|
||||
});
|
||||
lastFragment = tattrs.length === 0 ? '<a>' : '<a href="' + tattrs[0] + '">';
|
||||
} else {
|
||||
|
@ -282,18 +298,18 @@ function getSelectedNodes() {
|
|||
|
||||
/////
|
||||
|
||||
function deferredAddZip(url, filename, zip) {
|
||||
function deferredAddZip(url, filename) {
|
||||
var deferred = $.Deferred();
|
||||
JSZipUtils.getBinaryContent(url, function(err, data) {
|
||||
if (err) {
|
||||
// deferred.reject(err); TODO
|
||||
console.log('Error:', err);
|
||||
deferred.resolve();
|
||||
} else {
|
||||
var tmpImg = {
|
||||
extractedImages.push({
|
||||
filename: filename,
|
||||
data: base64ArrayBuffer(data)
|
||||
};
|
||||
allImages.push(tmpImg);
|
||||
});
|
||||
deferred.resolve();
|
||||
}
|
||||
});
|
||||
|
@ -301,10 +317,7 @@ function deferredAddZip(url, filename, zip) {
|
|||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
||||
console.log('Extract Html...');
|
||||
var imgsPromises = [];
|
||||
allImgSrc = {};
|
||||
allImages = [];
|
||||
var result = {};
|
||||
var pageSrc = '';
|
||||
var tmpContent = '';
|
||||
|
@ -317,19 +330,19 @@ chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
|||
pageSrc.forEach(function (page) {
|
||||
tmpContent += getContent(page);
|
||||
});
|
||||
} else if (request.type === 'echo') {
|
||||
sendResponse({
|
||||
echo: true
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (tmpContent.trim() === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
Object.keys(allImgSrc).forEach(function(imgSrc, index) {
|
||||
try {
|
||||
var tmpDeffered = deferredAddZip(getImgDownloadUrl(imgSrc), allImgSrc[imgSrc]);
|
||||
imgsPromises.push(tmpDeffered);
|
||||
} catch (e) {
|
||||
console.log('Error:', e);
|
||||
}
|
||||
allImages.forEach(function (tmpImg) {
|
||||
imgsPromises.push(deferredAddZip(tmpImg.originalUrl, tmpImg.filename));
|
||||
});
|
||||
|
||||
$.when.apply($, imgsPromises).done(function() {
|
||||
|
@ -337,7 +350,7 @@ chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
|||
url: getPageUrl(document.title),
|
||||
title: getPageTitle(document.title),
|
||||
baseUrl: getCurrentUrl(),
|
||||
images: allImages,
|
||||
images: extractedImages,
|
||||
content: tmpContent
|
||||
};
|
||||
sendResponse(result);
|
||||
|
|
|
@ -25,8 +25,6 @@ document.getElementById("editChapters").onclick = function() {
|
|||
|
||||
window.close();
|
||||
});
|
||||
|
||||
|
||||
};
|
||||
|
||||
function dispatch(action, justAddToBuffer) {
|
||||
|
@ -37,7 +35,10 @@ function dispatch(action, justAddToBuffer) {
|
|||
currentWindow: true,
|
||||
active: true
|
||||
}, function(tab) {
|
||||
|
||||
chrome.tabs.sendMessage(tab[0].id, {
|
||||
type: 'echo'
|
||||
}, function(response) {
|
||||
if (!response) {
|
||||
chrome.tabs.executeScript(tab[0].id, {file: '/jquery.js'});
|
||||
chrome.tabs.executeScript(tab[0].id, {file: '/utils.js'});
|
||||
chrome.tabs.executeScript(tab[0].id, {file: '/filesaver.js'});
|
||||
|
@ -48,7 +49,17 @@ function dispatch(action, justAddToBuffer) {
|
|||
chrome.tabs.executeScript(tab[0].id, {
|
||||
file: 'extractHtml.js'
|
||||
}, function() {
|
||||
chrome.tabs.sendMessage(tab[0].id, {
|
||||
sendMessage(tab[0].id, action, justAddToBuffer);
|
||||
});
|
||||
} else if (response.echo) {
|
||||
sendMessage(tab[0].id, action, justAddToBuffer);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function sendMessage(tabId, action, justAddToBuffer) {
|
||||
chrome.tabs.sendMessage(tabId, {
|
||||
type: action
|
||||
}, function(response) {
|
||||
if (!justAddToBuffer) {
|
||||
|
@ -61,8 +72,6 @@ function dispatch(action, justAddToBuffer) {
|
|||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
document.getElementById('savePage').onclick = function() {
|
||||
|
|
|
@ -39,7 +39,14 @@ function getOriginUrl() {
|
|||
|
||||
function getFileExtension(fileName) {
|
||||
try {
|
||||
var tmpFileName = fileName.split('.').pop();
|
||||
var tmpFileName = '';
|
||||
|
||||
if (isBase64Img(fileName)) {
|
||||
tmpFileName = getBase64ImgType(fileName);
|
||||
} else {
|
||||
tmpFileName = fileName.split('.').pop();
|
||||
}
|
||||
|
||||
if (tmpFileName.indexOf('?') > 0) {
|
||||
tmpFileName = tmpFileName.split('?')[0];
|
||||
}
|
||||
|
@ -47,12 +54,12 @@ function getFileExtension(fileName) {
|
|||
if (tmpFileName === 'jpg') {
|
||||
tmpFileName = 'jpeg';
|
||||
} else if (tmpFileName.trim() === '') {
|
||||
return 'jpeg'; //TODO
|
||||
return '';
|
||||
}
|
||||
return tmpFileName;
|
||||
} catch (e) {
|
||||
console.log('Error:', e);
|
||||
return 'jpeg'; //TODO
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,3 +128,32 @@ function base64ArrayBuffer(arrayBuffer) {
|
|||
|
||||
return base64;
|
||||
}
|
||||
|
||||
// http://stackoverflow.com/questions/7394748/whats-the-right-way-to-decode-a-string-that-has-special-html-entities-in-it
|
||||
function decodeHtmlEntity(str) {
|
||||
return str.replace(/&#(\d+);/g, function(match, dec) {
|
||||
return String.fromCharCode(dec);
|
||||
});
|
||||
}
|
||||
|
||||
function isBase64Img(srcTxt) {
|
||||
return srcTxt.indexOf('data:image/') === 0 && srcTxt.indexOf(';base64,') > 0;
|
||||
}
|
||||
|
||||
function getBase64ImgType(srcTxt) {
|
||||
try {
|
||||
return srcTxt.split(';')[0].split('/')[1];
|
||||
} catch (e) {
|
||||
console.log('Error:', e);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function getBase64ImgData(srcTxt) {
|
||||
try {
|
||||
return srcTxt.split(';base64,')[1];
|
||||
} catch (e) {
|
||||
console.log('Error:', e);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue