mirror of
https://github.com/alexadam/save-as-ebook.git
synced 2025-09-10 17:34:47 +00:00
misc
This commit is contained in:
parent
2ef1edad84
commit
5fc8683372
5 changed files with 46 additions and 322 deletions
|
@ -1,257 +0,0 @@
|
|||
var allImgSrc = {};
|
||||
//////
|
||||
|
||||
function getCurrentUrl() {
|
||||
var url = window.location.href;
|
||||
if (url.indexOf('?') > 0) {
|
||||
url = window.location.href.split('?')[0];
|
||||
}
|
||||
url = url.substring(0, url.lastIndexOf('/')+1);
|
||||
return url;
|
||||
}
|
||||
|
||||
function getFileExtension(fileName) {
|
||||
var tmpFileName = fileName.split('.').pop();
|
||||
if (tmpFileName.indexOf('?') > 0) {
|
||||
tmpFileName = tmpFileName.split('?')[0];
|
||||
}
|
||||
if (tmpFileName.trim() === '') {
|
||||
return 'jpg'; //TODO
|
||||
}
|
||||
return tmpFileName;
|
||||
}
|
||||
|
||||
function getImageSrc(srcTxt) {
|
||||
if (!srcTxt) {
|
||||
return '';
|
||||
}
|
||||
allImgSrc[srcTxt] = 'img-' + (Math.floor(Math.random()*1000000)) + '.' + getFileExtension(srcTxt);
|
||||
return '../images/' + allImgSrc[srcTxt];
|
||||
}
|
||||
|
||||
function getHref(hrefTxt) {
|
||||
if (!hrefTxt) {
|
||||
return '';
|
||||
}
|
||||
if (hrefTxt.indexOf('#') === 0) {
|
||||
hrefTxt = window.location.href + hrefTxt;
|
||||
}
|
||||
if (hrefTxt.indexOf('/') === 0) {
|
||||
hrefTxt = window.location.protocol + '//' + window.location.hostname + hrefTxt;
|
||||
}
|
||||
// hrefTxt = escape(hrefTxt); // TODO
|
||||
return hrefTxt;
|
||||
}
|
||||
|
||||
function force(contentString) {
|
||||
try {
|
||||
var tagOpen = '@@@';
|
||||
var tagClose = '###';
|
||||
var inlineElements = ['h1', 'h2', 'h3', 'sup', 'b', 'i', 'em', 'code', 'pre', 'p'];
|
||||
|
||||
var $content = $(contentString);
|
||||
|
||||
$content.find('img').each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'img src="' + getImageSrc($(elem).attr('src')) + '"' + tagClose + tagOpen + '/img' + tagClose + '</span>');
|
||||
});
|
||||
|
||||
$content.find('a').each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + 'a href="' + getHref($(elem).attr('href')) + '"' + tagClose + $(elem).html() + tagOpen + '/a' + tagClose + '</span>');
|
||||
});
|
||||
|
||||
if ($('*').length < 3000) { // TODO
|
||||
inlineElements.forEach(function (tagName) {
|
||||
$content.find(tagName).each(function (index, elem) {
|
||||
$(elem).replaceWith('<span>' + tagOpen + tagName + tagClose + $(elem).html() + tagOpen + '/' + tagName + tagClose + '</span>');
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
contentString = $content.text();
|
||||
|
||||
var tagOpenRegex = new RegExp(tagOpen, 'gi');
|
||||
var tagCloseRegex = new RegExp(tagClose, 'gi');
|
||||
contentString = contentString.replace(tagOpenRegex, '<');
|
||||
contentString = contentString.replace(tagCloseRegex, '>');
|
||||
contentString = contentString.replace(/&/gi, '&'); // TODO ??
|
||||
contentString = contentString.replace(/&/gi, '&');
|
||||
|
||||
return contentString;
|
||||
} catch(e) {
|
||||
console.log('ERROR');
|
||||
console.log(e);
|
||||
}
|
||||
}
|
||||
|
||||
// https://github.com/blowsie/Pure-JavaScript-HTML5-Parser
|
||||
function sanitize(rawContentString) {
|
||||
allImgSrc = {};
|
||||
var srcTxt = '';
|
||||
var dirty = null;
|
||||
try {
|
||||
// dirty = getHtmlAsString(rawContent);
|
||||
wdirty = $.parseHTML(rawContentString);
|
||||
$wdirty = $(wdirty);
|
||||
$wdirty.find('script, style, svg, canvas, noscript').remove();
|
||||
$wdirty.find('*:empty').not('img').remove();
|
||||
|
||||
dirty = '<div>' + $wdirty.html() + '</div>';
|
||||
|
||||
////////////////
|
||||
|
||||
|
||||
return force(dirty); // TODO
|
||||
|
||||
|
||||
// var dirty = '<div>' + document.getElementsByTagName('body')[0].innerHTML + '</div>';
|
||||
|
||||
var results = '';
|
||||
var lastFragment = '';
|
||||
var lastTag = '';
|
||||
var inList = false;
|
||||
var allowedTags = ['div', 'p', 'code', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', 'blockquote',
|
||||
'img', 'a', 'ol', 'ul', 'li', 'b', 'i', 'sup', 'strong', 'strike',
|
||||
'table', 'tr', 'td', 'th', 'thead', 'tbody', 'pre', 'em'
|
||||
];
|
||||
var allowedTextTags = ['h4', 'h5', 'h6', 'span'];
|
||||
|
||||
HTMLParser(dirty, {
|
||||
start: function(tag, attrs, unary) {
|
||||
lastTag = tag;
|
||||
if (allowedTags.indexOf(tag) < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tag === 'ol' || tag === 'ul') {
|
||||
inList = true;
|
||||
}
|
||||
if (tag === 'li' && !inList) {
|
||||
tag = 'p';
|
||||
}
|
||||
|
||||
var tattrs = null;
|
||||
if (tag === 'img') {
|
||||
tattrs = attrs.filter(function(attr) {
|
||||
return attr.name === 'src';
|
||||
}).map(function(attr) {
|
||||
return getImageSrc(attr.escaped);
|
||||
});
|
||||
lastFragment = tattrs.length === 0 ? '<img></img>' : '<img src="' + tattrs[0] + '" alt=""></img>';
|
||||
} else if (tag === 'a') {
|
||||
tattrs = attrs.filter(function(attr) {
|
||||
return attr.name === 'href';
|
||||
}).map(function(attr) {
|
||||
return getHref(attr.escaped);
|
||||
});
|
||||
lastFragment = tattrs.length === 0 ? '<a>' : '<a href="' + tattrs[0] + '">';
|
||||
} else {
|
||||
lastFragment = '<' + tag + '>';
|
||||
}
|
||||
|
||||
results += lastFragment;
|
||||
lastFragment = '';
|
||||
},
|
||||
end: function(tag) {
|
||||
if (allowedTags.indexOf(tag) < 0 || tag === 'img') {
|
||||
return;
|
||||
}
|
||||
|
||||
if (tag === 'ol' || tag === 'ul') {
|
||||
inList = false;
|
||||
}
|
||||
if (tag === 'li' && !inList) {
|
||||
tag = 'p';
|
||||
}
|
||||
|
||||
results += "</" + tag + ">\n";
|
||||
},
|
||||
chars: function(text) {
|
||||
if (lastTag !== '' && allowedTags.indexOf(lastTag) < 0) {
|
||||
return;
|
||||
}
|
||||
results += text;
|
||||
},
|
||||
comment: function(text) {
|
||||
// results += "<!--" + text + "-->";
|
||||
}
|
||||
});
|
||||
|
||||
// results = results.replace(/<([^>]+?)>\s*<\/\1>/gim, '');
|
||||
results = results.replace(/&[a-z]+;/gim, '');
|
||||
|
||||
return results;
|
||||
|
||||
} catch (e) {
|
||||
console.trace();
|
||||
console.log(e);
|
||||
|
||||
return force(dirty);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function getContent(htmlContent) {
|
||||
try {
|
||||
var tmp = document.createElement('div');
|
||||
tmp.appendChild(htmlContent.cloneNode(true));
|
||||
var dirty = '<div>' + tmp.innerHTML + '</div>';
|
||||
return sanitize(dirty);
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/////
|
||||
|
||||
function getPageUrl(url) {
|
||||
return url.toLowerCase().replace(/\s+/g,'_').replace(/[^a-z0-9_]/g,'') + Math.floor(Math.random() * 10000) + '.xhtml';
|
||||
}
|
||||
|
||||
function getPageTitle(inp) { //TODO
|
||||
return inp;
|
||||
}
|
||||
|
||||
function getSelectedNodes() {
|
||||
if (document.selection) {
|
||||
// return document.selection.createRange().parentElement();
|
||||
return document.selection.createRange();
|
||||
}
|
||||
var selection = window.getSelection();
|
||||
if (selection.rangeCount > 0) {
|
||||
var range = selection.getRangeAt(0);
|
||||
var selectionContents = range.cloneContents();
|
||||
return selectionContents;
|
||||
}
|
||||
}
|
||||
|
||||
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
|
||||
console.log('Extract Html...');
|
||||
allImgSrc = {};
|
||||
var result = {};
|
||||
var pageSrc = '';
|
||||
var tmpContent = '';
|
||||
|
||||
if (request.type === 'extract-page') {
|
||||
pageSrc = document.getElementsByTagName('body')[0];
|
||||
tmpContent = getContent(pageSrc);
|
||||
} else if (request.type === 'extract-selection') {
|
||||
pageSrc = getSelectedNodes();
|
||||
tmpContent = getContent(pageSrc);
|
||||
}
|
||||
|
||||
if (tmpContent.trim() === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
result = {
|
||||
url: getPageUrl(document.title),
|
||||
title: getPageTitle(document.title), //gatPageTitle(document.title),
|
||||
baseUrl: getCurrentUrl(),
|
||||
imgs: allImgSrc,
|
||||
content: tmpContent
|
||||
};
|
||||
|
||||
console.log('Html Extracted');
|
||||
sendResponse(result);
|
||||
});
|
|
@ -31,7 +31,6 @@
|
|||
background-color: #007fff;
|
||||
color: white;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
|
||||
|
@ -47,7 +46,6 @@
|
|||
<button id="editChapters" type="button" name="button">Edit Chapters ...</button>
|
||||
</div>
|
||||
|
||||
|
||||
<script src="jquery.js" charset="utf-8"></script>
|
||||
<script src="filesaver.js" charset="utf-8"></script>
|
||||
<script src="jszip.js" charset="utf-8"></script>
|
||||
|
|
|
@ -6,7 +6,7 @@ function getImagesIndex(allImages) {
|
|||
}, '');
|
||||
}
|
||||
|
||||
function getExternalLinksIndex() { // TODO ???
|
||||
function getExternalLinksIndex() { // TODO
|
||||
return allExternalLinks.reduce(function(prev, elem, index) {
|
||||
return prev + '\n' + '<item href="' + elem + '" />';
|
||||
}, '');
|
||||
|
@ -79,7 +79,6 @@ function _buildEbook(allPages) {
|
|||
'<text></text>' +
|
||||
'</docTitle>' +
|
||||
'<navMap>' +
|
||||
// '<content src="pages/' + pageName + '" />' + // TODO remove
|
||||
allPages.reduce(function(prev, page, index) {
|
||||
return prev + '\n' +
|
||||
'<navPoint id="ebook' + index + '" playOrder="' + (index + 1) + '">' +
|
||||
|
@ -120,7 +119,6 @@ function _buildEbook(allPages) {
|
|||
'<item id="toc" properties="nav" href="toc.xhtml" media-type="application/xhtml+xml" />' +
|
||||
'<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />' +
|
||||
'<item id="template_css" href="' + cssFileName + '" media-type="text/css" />' +
|
||||
// '<item id="ebook" href="pages/' + pageName + '" media-type="application/xhtml+xml" />' + //properties="remote-resources" // TODO remove
|
||||
allPages.reduce(function(prev, page, index) {
|
||||
return prev + '\n' + '<item id="ebook' + index + '" href="pages/' + page.url + '" media-type="application/xhtml+xml" />';
|
||||
}, '') +
|
||||
|
@ -130,7 +128,6 @@ function _buildEbook(allPages) {
|
|||
// getExternalLinksIndex() +
|
||||
'</manifest>' +
|
||||
'<spine toc="ncx">' +
|
||||
// '<itemref idref="ebook" />' + // TODO remove
|
||||
allPages.reduce(function(prev, page, index) {
|
||||
return prev + '\n' + '<itemref idref="ebook' + index + '" />';
|
||||
}, '') +
|
||||
|
@ -138,9 +135,6 @@ function _buildEbook(allPages) {
|
|||
'</package>'
|
||||
);
|
||||
|
||||
|
||||
|
||||
///////////////
|
||||
///////////////
|
||||
var imgsFolder = oebps.folder("images");
|
||||
allPages.forEach(function(page) {
|
||||
|
|
|
@ -1,26 +1,31 @@
|
|||
|
||||
function getEbookPages(callback) {
|
||||
chrome.runtime.sendMessage({type: "get"}, function(response) {
|
||||
chrome.runtime.sendMessage({
|
||||
type: "get"
|
||||
}, function(response) {
|
||||
callback(response.allPages);
|
||||
});
|
||||
}
|
||||
|
||||
function saveEbookPages(pages) {
|
||||
chrome.runtime.sendMessage({type: "set", pages: pages}, function(response) {});
|
||||
chrome.runtime.sendMessage({
|
||||
type: "set",
|
||||
pages: pages
|
||||
}, function(response) {});
|
||||
}
|
||||
|
||||
function removeEbook() {
|
||||
chrome.runtime.sendMessage({type: "remove"}, function(response) {});
|
||||
chrome.runtime.sendMessage({
|
||||
type: "remove"
|
||||
}, function(response) {});
|
||||
}
|
||||
|
||||
/////
|
||||
|
||||
function getCurrentUrl() {
|
||||
var url = window.location.href;
|
||||
if (url.indexOf('?') > 0) {
|
||||
url = window.location.href.split('?')[0];
|
||||
}
|
||||
url = url.substring(0, url.lastIndexOf('/')+1);
|
||||
url = url.substring(0, url.lastIndexOf('/') + 1);
|
||||
return url;
|
||||
}
|
||||
|
||||
|
@ -53,7 +58,7 @@ function getHref(hrefTxt) {
|
|||
if (hrefTxt.indexOf('/') === 0) {
|
||||
hrefTxt = window.location.protocol + '//' + window.location.hostname + hrefTxt;
|
||||
}
|
||||
// hrefTxt = escape(hrefTxt); // TODO
|
||||
// hrefTxt = escape(hrefTxt);
|
||||
return hrefTxt;
|
||||
}
|
||||
|
||||
|
@ -71,8 +76,7 @@ function getImgDownloadUrl(imgSrc) {
|
|||
return imgSrc;
|
||||
}
|
||||
|
||||
/////////
|
||||
|
||||
// https://gist.github.com/jonleighton/958841
|
||||
function base64ArrayBuffer(arrayBuffer) {
|
||||
var base64 = '';
|
||||
var encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
|
||||
|
@ -85,40 +89,25 @@ function base64ArrayBuffer(arrayBuffer) {
|
|||
var a, b, c, d;
|
||||
var chunk;
|
||||
|
||||
// Main loop deals with bytes in chunks of 3
|
||||
for (var i = 0; i < mainLength; i = i + 3) {
|
||||
// Combine the three bytes into a single integer
|
||||
chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2];
|
||||
|
||||
// Use bitmasks to extract 6-bit segments from the triplet
|
||||
a = (chunk & 16515072) >> 18; // 16515072 = (2^6 - 1) << 18
|
||||
b = (chunk & 258048) >> 12; // 258048 = (2^6 - 1) << 12
|
||||
c = (chunk & 4032) >> 6; // 4032 = (2^6 - 1) << 6
|
||||
d = chunk & 63; // 63 = 2^6 - 1
|
||||
|
||||
// Convert the raw binary segments to the appropriate ASCII encoding
|
||||
a = (chunk & 16515072) >> 18;
|
||||
b = (chunk & 258048) >> 12;
|
||||
c = (chunk & 4032) >> 6;
|
||||
d = chunk & 63;
|
||||
base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d];
|
||||
}
|
||||
|
||||
// Deal with the remaining bytes and padding
|
||||
if (byteRemainder == 1) {
|
||||
if (byteRemainder === 1) {
|
||||
chunk = bytes[mainLength];
|
||||
|
||||
a = (chunk & 252) >> 2; // 252 = (2^6 - 1) << 2
|
||||
|
||||
// Set the 4 least significant bits to zero
|
||||
b = (chunk & 3) << 4; // 3 = 2^2 - 1
|
||||
|
||||
a = (chunk & 252) >> 2;
|
||||
b = (chunk & 3) << 4;
|
||||
base64 += encodings[a] + encodings[b] + '==';
|
||||
} else if (byteRemainder == 2) {
|
||||
} else if (byteRemainder === 2) {
|
||||
chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1];
|
||||
|
||||
a = (chunk & 64512) >> 10; // 64512 = (2^6 - 1) << 10
|
||||
b = (chunk & 1008) >> 4; // 1008 = (2^6 - 1) << 4
|
||||
|
||||
// Set the 2 least significant bits to zero
|
||||
c = (chunk & 15) << 2; // 15 = 2^4 - 1
|
||||
|
||||
a = (chunk & 64512) >> 10;
|
||||
b = (chunk & 1008) >> 4;
|
||||
c = (chunk & 15) << 2;
|
||||
base64 += encodings[a] + encodings[b] + encodings[c] + '=';
|
||||
}
|
||||
|
||||
|
|
|
@ -193,8 +193,8 @@ function getPageUrl(url) {
|
|||
return url.toLowerCase().replace(/\s+/g,'_').replace(/[^a-z0-9_]/g,'') + Math.floor(Math.random() * 10000) + '.xhtml';
|
||||
}
|
||||
|
||||
function getPageTitle(inp) { //TODO
|
||||
return inp;
|
||||
function getPageTitle(title) { //TODO
|
||||
return title;
|
||||
}
|
||||
|
||||
function getSelectedNodes() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue