mirror of
https://github.com/alexadam/save-as-ebook.git
synced 2025-09-10 09:24:49 +00:00
add support for base64 imgs; don't execute extract scripts multiple times on the same tab
This commit is contained in:
parent
cc68e99a94
commit
0124977a37
3 changed files with 111 additions and 53 deletions
|
@ -1,5 +1,5 @@
|
||||||
var allImgSrc = {};
|
|
||||||
var allImages = [];
|
var allImages = [];
|
||||||
|
var extractedImages = [];
|
||||||
var maxNrOfElements = 10000;
|
var maxNrOfElements = 10000;
|
||||||
//////
|
//////
|
||||||
|
|
||||||
|
@ -7,8 +7,23 @@ function getImageSrc(srcTxt) {
|
||||||
if (!srcTxt) {
|
if (!srcTxt) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
allImgSrc[srcTxt] = 'img-' + (Math.floor(Math.random()*1000000)) + '.' + getFileExtension(srcTxt);
|
var isB64Img = isBase64Img(srcTxt);
|
||||||
return '../images/' + allImgSrc[srcTxt];
|
var fileExtension = getFileExtension(srcTxt);
|
||||||
|
var newImgFileName = 'img-' + (Math.floor(Math.random()*1000000*Math.random()*100000)) + '.' + fileExtension;
|
||||||
|
|
||||||
|
if (isB64Img) {
|
||||||
|
extractedImages.push({
|
||||||
|
filename: newImgFileName, // TODO name
|
||||||
|
data: getBase64ImgData(srcTxt)
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
allImages.push({
|
||||||
|
originalUrl: getImgDownloadUrl(srcTxt),
|
||||||
|
filename: newImgFileName, // TODO name
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return '../images/' + newImgFileName;
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateRandomTag() {
|
function generateRandomTag() {
|
||||||
|
@ -98,11 +113,11 @@ function force(contentString) {
|
||||||
var $content = $(contentString);
|
var $content = $(contentString);
|
||||||
|
|
||||||
$content.find('img').each(function (index, elem) {
|
$content.find('img').each(function (index, elem) {
|
||||||
$(elem).replaceWith('<span>' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src')) + '"' + tagClose + tagOpen + '/img' + tagClose + '</span>');
|
$(elem).replaceWith('<span>' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src').trim()) + '"' + tagClose + tagOpen + '/img' + tagClose + '</span>');
|
||||||
});
|
});
|
||||||
|
|
||||||
$content.find('a').each(function (index, elem) {
|
$content.find('a').each(function (index, elem) {
|
||||||
$(elem).replaceWith('<span>' + tagOpen + 'a href="' + getHref($(elem).attr('href')) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '</span>');
|
$(elem).replaceWith('<span>' + tagOpen + 'a href="' + getHref($(elem).attr('href').trim()) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '</span>');
|
||||||
});
|
});
|
||||||
|
|
||||||
if ($('*').length < maxNrOfElements) {
|
if ($('*').length < maxNrOfElements) {
|
||||||
|
@ -144,14 +159,15 @@ function force(contentString) {
|
||||||
|
|
||||||
// https://github.com/blowsie/Pure-JavaScript-HTML5-Parser
|
// https://github.com/blowsie/Pure-JavaScript-HTML5-Parser
|
||||||
function sanitize(rawContentString) {
|
function sanitize(rawContentString) {
|
||||||
allImgSrc = {};
|
allImages = [];
|
||||||
|
extractedImages = [];
|
||||||
var srcTxt = '';
|
var srcTxt = '';
|
||||||
var dirty = null;
|
var dirty = null;
|
||||||
try {
|
try {
|
||||||
// dirty = getHtmlAsString(rawContent);
|
// dirty = getHtmlAsString(rawContent);
|
||||||
wdirty = $.parseHTML(rawContentString);
|
var wdirty = $.parseHTML(rawContentString);
|
||||||
$wdirty = $(wdirty);
|
$wdirty = $(wdirty);
|
||||||
$wdirty.find('script, style, svg, canvas, noscript').remove();
|
$wdirty.find('script, style, svg, canvas, noscript').remove(); // TODO remove iframes
|
||||||
$wdirty.find('*:empty').not('img').remove();
|
$wdirty.find('*:empty').not('img').remove();
|
||||||
|
|
||||||
dirty = '<div>' + $wdirty.html() + '</div>';
|
dirty = '<div>' + $wdirty.html() + '</div>';
|
||||||
|
@ -190,14 +206,14 @@ function sanitize(rawContentString) {
|
||||||
tattrs = attrs.filter(function(attr) {
|
tattrs = attrs.filter(function(attr) {
|
||||||
return attr.name === 'src';
|
return attr.name === 'src';
|
||||||
}).map(function(attr) {
|
}).map(function(attr) {
|
||||||
return getImageSrc(attr.escaped);
|
return getImageSrc(decodeHtmlEntity(attr.value).trim());
|
||||||
});
|
});
|
||||||
lastFragment = tattrs.length === 0 ? '<img></img>' : '<img src="' + tattrs[0] + '" alt=""></img>';
|
lastFragment = tattrs.length === 0 ? '<img></img>' : '<img src="' + tattrs[0] + '" alt=""></img>';
|
||||||
} else if (tag === 'a') {
|
} else if (tag === 'a') {
|
||||||
tattrs = attrs.filter(function(attr) {
|
tattrs = attrs.filter(function(attr) {
|
||||||
return attr.name === 'href';
|
return attr.name === 'href';
|
||||||
}).map(function(attr) {
|
}).map(function(attr) {
|
||||||
return getHref(attr.escaped);
|
return getHref(decodeHtmlEntity(attr.value).trim());
|
||||||
});
|
});
|
||||||
lastFragment = tattrs.length === 0 ? '<a>' : '<a href="' + tattrs[0] + '">';
|
lastFragment = tattrs.length === 0 ? '<a>' : '<a href="' + tattrs[0] + '">';
|
||||||
} else {
|
} else {
|
||||||
|
@ -282,18 +298,18 @@ function getSelectedNodes() {
|
||||||
|
|
||||||
/////
|
/////
|
||||||
|
|
||||||
function deferredAddZip(url, filename, zip) {
|
function deferredAddZip(url, filename) {
|
||||||
var deferred = $.Deferred();
|
var deferred = $.Deferred();
|
||||||
JSZipUtils.getBinaryContent(url, function(err, data) {
|
JSZipUtils.getBinaryContent(url, function(err, data) {
|
||||||
if (err) {
|
if (err) {
|
||||||
// deferred.reject(err); TODO
|
// deferred.reject(err); TODO
|
||||||
|
console.log('Error:', err);
|
||||||
deferred.resolve();
|
deferred.resolve();
|
||||||
} else {
|
} else {
|
||||||
var tmpImg = {
|
extractedImages.push({
|
||||||
filename: filename,
|
filename: filename,
|
||||||
data: base64ArrayBuffer(data)
|
data: base64ArrayBuffer(data)
|
||||||
};
|
});
|
||||||
allImages.push(tmpImg);
|
|
||||||
deferred.resolve();
|
deferred.resolve();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -301,10 +317,7 @@ function deferredAddZip(url, filename, zip) {
|
||||||
}
|
}
|
||||||
|
|
||||||
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
||||||
console.log('Extract Html...');
|
|
||||||
var imgsPromises = [];
|
var imgsPromises = [];
|
||||||
allImgSrc = {};
|
|
||||||
allImages = [];
|
|
||||||
var result = {};
|
var result = {};
|
||||||
var pageSrc = '';
|
var pageSrc = '';
|
||||||
var tmpContent = '';
|
var tmpContent = '';
|
||||||
|
@ -317,19 +330,19 @@ chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
||||||
pageSrc.forEach(function (page) {
|
pageSrc.forEach(function (page) {
|
||||||
tmpContent += getContent(page);
|
tmpContent += getContent(page);
|
||||||
});
|
});
|
||||||
|
} else if (request.type === 'echo') {
|
||||||
|
sendResponse({
|
||||||
|
echo: true
|
||||||
|
});
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tmpContent.trim() === '') {
|
if (tmpContent.trim() === '') {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Object.keys(allImgSrc).forEach(function(imgSrc, index) {
|
allImages.forEach(function (tmpImg) {
|
||||||
try {
|
imgsPromises.push(deferredAddZip(tmpImg.originalUrl, tmpImg.filename));
|
||||||
var tmpDeffered = deferredAddZip(getImgDownloadUrl(imgSrc), allImgSrc[imgSrc]);
|
|
||||||
imgsPromises.push(tmpDeffered);
|
|
||||||
} catch (e) {
|
|
||||||
console.log('Error:', e);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
$.when.apply($, imgsPromises).done(function() {
|
$.when.apply($, imgsPromises).done(function() {
|
||||||
|
@ -337,7 +350,7 @@ chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
||||||
url: getPageUrl(document.title),
|
url: getPageUrl(document.title),
|
||||||
title: getPageTitle(document.title),
|
title: getPageTitle(document.title),
|
||||||
baseUrl: getCurrentUrl(),
|
baseUrl: getCurrentUrl(),
|
||||||
images: allImages,
|
images: extractedImages,
|
||||||
content: tmpContent
|
content: tmpContent
|
||||||
};
|
};
|
||||||
sendResponse(result);
|
sendResponse(result);
|
||||||
|
|
|
@ -25,8 +25,6 @@ document.getElementById("editChapters").onclick = function() {
|
||||||
|
|
||||||
window.close();
|
window.close();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
function dispatch(action, justAddToBuffer) {
|
function dispatch(action, justAddToBuffer) {
|
||||||
|
@ -37,7 +35,10 @@ function dispatch(action, justAddToBuffer) {
|
||||||
currentWindow: true,
|
currentWindow: true,
|
||||||
active: true
|
active: true
|
||||||
}, function(tab) {
|
}, function(tab) {
|
||||||
|
chrome.tabs.sendMessage(tab[0].id, {
|
||||||
|
type: 'echo'
|
||||||
|
}, function(response) {
|
||||||
|
if (!response) {
|
||||||
chrome.tabs.executeScript(tab[0].id, {file: '/jquery.js'});
|
chrome.tabs.executeScript(tab[0].id, {file: '/jquery.js'});
|
||||||
chrome.tabs.executeScript(tab[0].id, {file: '/utils.js'});
|
chrome.tabs.executeScript(tab[0].id, {file: '/utils.js'});
|
||||||
chrome.tabs.executeScript(tab[0].id, {file: '/filesaver.js'});
|
chrome.tabs.executeScript(tab[0].id, {file: '/filesaver.js'});
|
||||||
|
@ -48,7 +49,17 @@ function dispatch(action, justAddToBuffer) {
|
||||||
chrome.tabs.executeScript(tab[0].id, {
|
chrome.tabs.executeScript(tab[0].id, {
|
||||||
file: 'extractHtml.js'
|
file: 'extractHtml.js'
|
||||||
}, function() {
|
}, function() {
|
||||||
chrome.tabs.sendMessage(tab[0].id, {
|
sendMessage(tab[0].id, action, justAddToBuffer);
|
||||||
|
});
|
||||||
|
} else if (response.echo) {
|
||||||
|
sendMessage(tab[0].id, action, justAddToBuffer);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function sendMessage(tabId, action, justAddToBuffer) {
|
||||||
|
chrome.tabs.sendMessage(tabId, {
|
||||||
type: action
|
type: action
|
||||||
}, function(response) {
|
}, function(response) {
|
||||||
if (!justAddToBuffer) {
|
if (!justAddToBuffer) {
|
||||||
|
@ -61,8 +72,6 @@ function dispatch(action, justAddToBuffer) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
document.getElementById('savePage').onclick = function() {
|
document.getElementById('savePage').onclick = function() {
|
||||||
|
|
|
@ -39,7 +39,14 @@ function getOriginUrl() {
|
||||||
|
|
||||||
function getFileExtension(fileName) {
|
function getFileExtension(fileName) {
|
||||||
try {
|
try {
|
||||||
var tmpFileName = fileName.split('.').pop();
|
var tmpFileName = '';
|
||||||
|
|
||||||
|
if (isBase64Img(fileName)) {
|
||||||
|
tmpFileName = getBase64ImgType(fileName);
|
||||||
|
} else {
|
||||||
|
tmpFileName = fileName.split('.').pop();
|
||||||
|
}
|
||||||
|
|
||||||
if (tmpFileName.indexOf('?') > 0) {
|
if (tmpFileName.indexOf('?') > 0) {
|
||||||
tmpFileName = tmpFileName.split('?')[0];
|
tmpFileName = tmpFileName.split('?')[0];
|
||||||
}
|
}
|
||||||
|
@ -47,12 +54,12 @@ function getFileExtension(fileName) {
|
||||||
if (tmpFileName === 'jpg') {
|
if (tmpFileName === 'jpg') {
|
||||||
tmpFileName = 'jpeg';
|
tmpFileName = 'jpeg';
|
||||||
} else if (tmpFileName.trim() === '') {
|
} else if (tmpFileName.trim() === '') {
|
||||||
return 'jpeg'; //TODO
|
return '';
|
||||||
}
|
}
|
||||||
return tmpFileName;
|
return tmpFileName;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log('Error:', e);
|
console.log('Error:', e);
|
||||||
return 'jpeg'; //TODO
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,3 +128,32 @@ function base64ArrayBuffer(arrayBuffer) {
|
||||||
|
|
||||||
return base64;
|
return base64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// http://stackoverflow.com/questions/7394748/whats-the-right-way-to-decode-a-string-that-has-special-html-entities-in-it
|
||||||
|
function decodeHtmlEntity(str) {
|
||||||
|
return str.replace(/&#(\d+);/g, function(match, dec) {
|
||||||
|
return String.fromCharCode(dec);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function isBase64Img(srcTxt) {
|
||||||
|
return srcTxt.indexOf('data:image/') === 0 && srcTxt.indexOf(';base64,') > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getBase64ImgType(srcTxt) {
|
||||||
|
try {
|
||||||
|
return srcTxt.split(';')[0].split('/')[1];
|
||||||
|
} catch (e) {
|
||||||
|
console.log('Error:', e);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getBase64ImgData(srcTxt) {
|
||||||
|
try {
|
||||||
|
return srcTxt.split(';base64,')[1];
|
||||||
|
} catch (e) {
|
||||||
|
console.log('Error:', e);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue