/*******************************************************************************
* Copyright (c) 2010, Luca Conte @ BERT Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* · Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* · Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* · Neither the name of Adobe Systems Incorporated nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
package org.bert.ebooks.processors;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.commons.lang.StringUtils;
import org.bert.ebooks.BlogEPUBlisher;
import org.bert.ebooks.BlogEntryProcessor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.adobe.dp.epub.ncx.TOCEntry;
import com.adobe.dp.epub.opf.OPSResource;
import com.adobe.dp.epub.ops.OPSDocument;
public class GenericProcessor implements BlogEntryProcessor {
private boolean processImages;
private String contentsSelector;
private String titleSelector;
private String subTitleSelector;
private String metaInfoSelector;
private String bodyParagraphsSelector;
private String commentSelector;
private String commentAuthorSelector;
private String commentMetaInfoSelector;
private String commentBodyParagraphsSelector;
@Override
public void processURL(BlogEPUBlisher publisher,OPSResource post,
TOCEntry rootTOCEntry, String url)
throws MalformedURLException, IOException {
Document doc = Jsoup.parse(new URL(url), 60000);
// process html
Elements e = getContentsElements(doc);
// get chapter document
OPSDocument mainDoc = post.getDocument();
// add chapter to the table of contents
TOCEntry mainTOCEntry = publisher.getToc().createTOCEntry(getTitleElements(e).text(),
mainDoc.getRootXRef());
rootTOCEntry.add(mainTOCEntry);
// chapter XHTML body element
com.adobe.dp.epub.ops.Element body = mainDoc.getBody();
// title
com.adobe.dp.epub.ops.Element h = mainDoc.createElement("h1");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc, BlogEPUBlisher.getFirstElement(getTitleElements(e)), h,isProcessImages());
body.add(h);
h = mainDoc.createElement("h4");
h.add("( "+url+" )");
body.add(h);
if(StringUtils.isNotBlank(getSubTitleSelector())){
h = mainDoc.createElement("h3");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc, BlogEPUBlisher.getFirstElement(getSubTitleElements(e)), h,isProcessImages());
body.add(h);
}
//meta: author, date, etc.
if(StringUtils.isNotBlank(getMetaInfoSelector())){
h = mainDoc.createElement("h2");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc, BlogEPUBlisher.getFirstElement(getMetaInfoElements(e)), h,isProcessImages());
body.add(h);
}
// add a paragraph
com.adobe.dp.epub.ops.Element paragraph = null;
//real content
e = getBodyParagraphsElements(e);
for (Element element : e) {
paragraph = mainDoc.createElement("p");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc, element, paragraph,isProcessImages());
body.add(paragraph);
}
if(StringUtils.isNotBlank(getCommentSelector())){
Elements c = getCommentElements(doc);
if (c.size() > 0) {
com.adobe.dp.epub.ops.Element elm = mainDoc.createElement("h2");
elm.add("COMMENTI");
body.add(elm);
mainTOCEntry.add(publisher.getToc().createTOCEntry("Commenti", elm.getSelfRef()));
for (Element ce : c) {
elm = mainDoc.createElement("hr");
body.add(elm);
elm = mainDoc.createElement("h3");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc, BlogEPUBlisher.getFirstElement(getCommentAuthorElements(ce)), elm,isProcessImages());
body.add(elm);
elm = mainDoc.createElement("h4");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc,
BlogEPUBlisher.getFirstElement(getCommentMetaInfoElements(ce)), elm,isProcessImages());
body.add(elm);
for (Element pe : getCommentBodyParagraphsElements(ce)) {
elm = mainDoc.createElement("p");
BlogEPUBlisher.processContents(publisher.getEpub(),mainDoc, pe, elm,isProcessImages());
body.add(elm);
}
}
}
}
}
protected Elements getContentsElements(Document doc) {
return doc.select(getContentsSelector());
}
protected Elements getBodyParagraphsElements(Elements e) {
return e.select(getBodyParagraphsSelector());
}
protected Elements getTitleElements(Elements e) {
return e.select(getTitleSelector());
}
protected Elements getSubTitleElements(Elements e) {
return e.select(getSubTitleSelector());
}
protected Elements getMetaInfoElements(Elements e) {
return e.select(getMetaInfoSelector());
}
protected Elements getCommentElements(Document e) {
return e.select(getCommentSelector());
}
protected Elements getCommentAuthorElements(Element e) {
return e.select(getCommentAuthorSelector());
}
protected Elements getCommentMetaInfoElements(Element e) {
return e.select(getCommentMetaInfoSelector());
}
protected Elements getCommentBodyParagraphsElements(Element e) {
return e.select(getCommentBodyParagraphsSelector());
}
public void setProcessImages(boolean processImages) {
this.processImages = processImages;
}
@Override
public boolean isProcessImages() {
return processImages;
}
public String getContentsSelector() {
return contentsSelector;
}
public void setContentsSelector(String contentsSelector) {
this.contentsSelector = contentsSelector;
}
public String getTitleSelector() {
return titleSelector;
}
public void setTitleSelector(String titleSelector) {
this.titleSelector = titleSelector;
}
public String getSubTitleSelector() {
return subTitleSelector;
}
public void setSubTitleSelector(String subTitleSelector) {
this.subTitleSelector = subTitleSelector;
}
public String getMetaInfoSelector() {
return metaInfoSelector;
}
public void setMetaInfoSelector(String metaInfoSelector) {
this.metaInfoSelector = metaInfoSelector;
}
public String getCommentSelector() {
return commentSelector;
}
public void setCommentSelector(String commentSelector) {
this.commentSelector = commentSelector;
}
public String getCommentAuthorSelector() {
return commentAuthorSelector;
}
public void setCommentAuthorSelector(String commentAuthorSelector) {
this.commentAuthorSelector = commentAuthorSelector;
}
public String getCommentMetaInfoSelector() {
return commentMetaInfoSelector;
}
public void setCommentMetaInfoSelector(String commentMetaInfoSelector) {
this.commentMetaInfoSelector = commentMetaInfoSelector;
}
public String getBodyParagraphsSelector() {
return bodyParagraphsSelector;
}
public void setBodyParagraphsSelector(String bodyParagraphsSelector) {
this.bodyParagraphsSelector = bodyParagraphsSelector;
}
public String getCommentBodyParagraphsSelector() {
return commentBodyParagraphsSelector;
}
public void setCommentBodyParagraphsSelector(
String commentBodyParagraphsSelector) {
this.commentBodyParagraphsSelector = commentBodyParagraphsSelector;
}
}