Java Tutorial/PDF/HTML Parser
HtmlParser from iText
<source lang="java">
import java.io.FileOutputStream; import com.lowagie.text.Document; import com.lowagie.text.html.HtmlParser; import com.lowagie.text.pdf.PdfWriter; public class MainClass {
public static void main(String[] args) throws Exception { Document document = new Document(); PdfWriter.getInstance(document, new FileOutputStream("html1.pdf")); HtmlParser.parse(document, "example.html"); }
}</source>
Parsing Html
<source lang="java">
import java.io.FileOutputStream; import java.io.FileReader; import java.util.ArrayList; import com.lowagie.text.Document; import com.lowagie.text.Element; import com.lowagie.text.html.simpleparser.HTMLWorker; import com.lowagie.text.html.simpleparser.StyleSheet; import com.lowagie.text.pdf.PdfWriter; public class MainClass {
public static void main(String[] args) throws Exception { Document document = new Document(); StyleSheet st = new StyleSheet(); st.loadTagStyle("body", "leading", "16,0"); PdfWriter.getInstance(document, new FileOutputStream("html2.pdf")); document.open(); ArrayList p = HTMLWorker.parseToList(new FileReader("example.html"), st); for (int k = 0; k < p.size(); ++k) document.add((Element) p.get(k)); document.close(); }
}</source>
Parsing Html Snippets
<source lang="java">
import java.io.FileOutputStream; import java.io.FileReader; import java.util.ArrayList; import com.lowagie.text.Document; import com.lowagie.text.Element; import com.lowagie.text.html.simpleparser.HTMLWorker; import com.lowagie.text.html.simpleparser.StyleSheet; import com.lowagie.text.pdf.PdfWriter; public class MainClass {
public static void main(String[] args) throws Exception { Document document = new Document(); StyleSheet styles = new StyleSheet(); styles.loadTagStyle("ol", "leading", "16,0"); PdfWriter.getInstance(document, new FileOutputStream("html3.pdf")); document.open(); ArrayList objects; objects = HTMLWorker.parseToList(new FileReader("data.html"), styles); for (int k = 0; k < objects.size(); ++k) document.add((Element) objects.get(k)); document.close(); }
}</source>