Java by API/javax.swing.text.html/HTMLEditorKit
Содержание
extends HTMLEditorKit
<source lang="java">
import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.Enumeration; import javax.swing.text.AttributeSet; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; public class Main {
public static void main(String[] args) throws Exception { ParserGetter kit = new ParserGetter(); HTMLEditorKit.Parser parser = kit.getParser(); HTMLEditorKit.ParserCallback callback = new ReportAttributes(); URL u = new URL("http://www.jexp.ru"); InputStream in = u.openStream(); InputStreamReader r = new InputStreamReader(in); parser.parse(r, callback, false); }
} class ReportAttributes extends HTMLEditorKit.ParserCallback {
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { this.listAttributes(attributes); } private void listAttributes(AttributeSet attributes) { Enumeration e = attributes.getAttributeNames(); while (e.hasMoreElements()) { Object name = e.nextElement(); Object value = attributes.getAttribute(name); if (!attributes.containsAttribute(name.toString(), value)) { System.out.println("containsAttribute() fails"); } if (!attributes.isDefined(name.toString())) { System.out.println("isDefined() fails"); } System.out.println(name + "=" + value); } } public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { this.listAttributes(attributes); }
} class ParserGetter extends HTMLEditorKit {
public HTMLEditorKit.Parser getParser() { return super.getParser(); }
}
</source>
extends HTMLEditorKit.ParserCallback
<source lang="java">
import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.Enumeration; import javax.swing.text.AttributeSet; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; public class Main {
public static void main(String[] args) throws Exception { ParserGetter kit = new ParserGetter(); HTMLEditorKit.Parser parser = kit.getParser(); HTMLEditorKit.ParserCallback callback = new ReportAttributes(); URL u = new URL("http://www.jexp.ru"); InputStream in = u.openStream(); InputStreamReader r = new InputStreamReader(in); parser.parse(r, callback, false); }
} class ReportAttributes extends HTMLEditorKit.ParserCallback {
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { this.listAttributes(attributes); } private void listAttributes(AttributeSet attributes) { Enumeration e = attributes.getAttributeNames(); while (e.hasMoreElements()) { Object name = e.nextElement(); Object value = attributes.getAttribute(name); if (!attributes.containsAttribute(name.toString(), value)) { System.out.println("containsAttribute() fails"); } if (!attributes.isDefined(name.toString())) { System.out.println("isDefined() fails"); } System.out.println(name + "=" + value); } } public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { this.listAttributes(attributes); }
} class ParserGetter extends HTMLEditorKit {
public HTMLEditorKit.Parser getParser() { return super.getParser(); }
}
</source>
HTMLEditorKit: createDefaultDocument()
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } }
}
</source>
HTMLEditorKit.Parser: parse(Reader r, ParserCallback cb, boolean i)
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } }
}
</source>
new HTMLEditorKit()
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } }
}
</source>