Java by API/javax.swing.text.html/HTML
Содержание
HTML.Attribute.HREF
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator .next()) { AttributeSet attributes = iterator.getAttributes(); String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF); System.out.print(srcString); int startOffset = iterator.getStartOffset(); int endOffset = iterator.getEndOffset(); int length = endOffset - startOffset; String text = htmlDoc.getText(startOffset, length); System.out.println(" - " + text); } }
}
</source>
HTML: getAllAttributeKeys()
<source lang="java">
import javax.swing.text.html.HTML; public class Main {
public static void main(String[] args) { HTML.Tag[] tags = HTML.getAllTags(); HTML.Attribute[] attrs = HTML.getAllAttributeKeys(); System.out.println("HTML Tags:"); for (int i = 0; i < tags.length - 1; i++) { System.out.print(tags[i] + ", "); if ((i % 8) == 7) { System.out.println(""); } } System.out.println(tags[tags.length - 1]); System.out.println("\n\nHTML Attributes:"); for (int i = 0; i < attrs.length - 1; i++) { System.out.print(attrs[i] + ", "); if ((i % 8) == 7) { System.out.println(""); } } System.out.println(attrs[attrs.length - 1]); }
}
</source>
HTML.Tag
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } }
}
</source>
HTML.Tag.A
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); for (HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); iterator.isValid(); iterator .next()) { AttributeSet attributes = iterator.getAttributes(); String srcString = (String) attributes.getAttribute(HTML.Attribute.HREF); System.out.print(srcString); int startOffset = iterator.getStartOffset(); int endOffset = iterator.getEndOffset(); int length = endOffset - startOffset; String text = htmlDoc.getText(startOffset, length); System.out.println(" - " + text); } }
}
</source>
HTML.Tag.CONTENT
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } }
}
</source>
HTML: Tag[] getAllTags()
<source lang="java">
import javax.swing.text.html.HTML; public class Main {
public static void main(String[] args) { HTML.Tag[] list = HTML.getAllTags(); for (int i = 0; i < list.length; i++) { System.out.println((i + 1) + ": " + list[i]); } }
} /*1: a 2: address 3: applet 4: area 5: b 6: base 7: basefont 8: big 9: blockquote 10: body 11: br 12: caption 13: center 14: cite 15: code 16: dd 17: dfn 18: dir 19: div 20: dl 21: dt 22: em 23: font 24: form 25: frame 26: frameset 27: h1 28: h2 29: h3 30: h4 31: h5 32: h6 33: head 34: hr 35: html 36: i 37: img 38: input 39: isindex 40: kbd 41: li 42: link 43: map 44: menu 45: meta 46: nobr 47: noframes 48: object 49: ol 50: option 51: p 52: param 53: pre 54: samp 55: script 56: select 57: small 58: span 59: strike 60: s 61: strong 62: style 63: sub 64: sup 65: table 66: td 67: textarea 68: th 69: title 70: tr 71: tt 72: u 73: ul 74: var
- /
</source>
HTML.Tag.H1
<source lang="java">
import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import javax.swing.text.AttributeSet; import javax.swing.text.Element; import javax.swing.text.ElementIterator; import javax.swing.text.StyleConstants; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLDocument; import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; public class MainClass {
public static void main(String args[]) throws Exception { URL url = new URL("http://www.jexp.ru"); URLConnection connection = url.openConnection(); InputStream is = connection.getInputStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); HTMLEditorKit htmlKit = new HTMLEditorKit(); HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument(); HTMLEditorKit.Parser parser = new ParserDelegator(); HTMLEditorKit.ParserCallback callback = htmlDoc.getReader(0); parser.parse(br, callback, true); ElementIterator iterator = new ElementIterator(htmlDoc); Element element; while ((element = iterator.next()) != null) { AttributeSet attributes = element.getAttributes(); Object name = attributes.getAttribute(StyleConstants.NameAttribute); if ((name instanceof HTML.Tag) && (name == HTML.Tag.H1)) { StringBuffer text = new StringBuffer(); int count = element.getElementCount(); for (int i = 0; i < count; i++) { Element child = element.getElement(i); AttributeSet childAttributes = child.getAttributes(); if (childAttributes.getAttribute(StyleConstants.NameAttribute) == HTML.Tag.CONTENT) { int startOffset = child.getStartOffset(); int endOffset = child.getEndOffset(); int length = endOffset - startOffset; text.append(htmlDoc.getText(startOffset, length)); } } System.out.println(name + ": " + text.toString()); } } }
}
</source>