<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="ru">
		<id>http://www.jexp.ru/index.php?action=history&amp;feed=atom&amp;title=Java_Tutorial%2FNetwork%2FHTML_Parser</id>
		<title>Java Tutorial/Network/HTML Parser - История изменений</title>
		<link rel="self" type="application/atom+xml" href="http://www.jexp.ru/index.php?action=history&amp;feed=atom&amp;title=Java_Tutorial%2FNetwork%2FHTML_Parser"/>
		<link rel="alternate" type="text/html" href="http://www.jexp.ru/index.php?title=Java_Tutorial/Network/HTML_Parser&amp;action=history"/>
		<updated>2026-04-21T22:31:13Z</updated>
		<subtitle>История изменений этой страницы в вики</subtitle>
		<generator>MediaWiki 1.30.0</generator>

	<entry>
		<id>http://www.jexp.ru/index.php?title=Java_Tutorial/Network/HTML_Parser&amp;diff=4616&amp;oldid=prev</id>
		<title>Admin: 1 версия</title>
		<link rel="alternate" type="text/html" href="http://www.jexp.ru/index.php?title=Java_Tutorial/Network/HTML_Parser&amp;diff=4616&amp;oldid=prev"/>
				<updated>2010-06-01T05:03:39Z</updated>
		
		<summary type="html">&lt;p&gt;1 версия&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;tr style=&quot;vertical-align: top;&quot; lang=&quot;ru&quot;&gt;
				&lt;td colspan=&quot;1&quot; style=&quot;background-color: white; color:black; text-align: center;&quot;&gt;← Предыдущая&lt;/td&gt;
				&lt;td colspan=&quot;1&quot; style=&quot;background-color: white; color:black; text-align: center;&quot;&gt;Версия 05:03, 1 июня 2010&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; style=&quot;text-align: center;&quot; lang=&quot;ru&quot;&gt;&lt;div class=&quot;mw-diff-empty&quot;&gt;(нет различий)&lt;/div&gt;
&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;</summary>
		<author><name>Admin</name></author>	</entry>

	<entry>
		<id>http://www.jexp.ru/index.php?title=Java_Tutorial/Network/HTML_Parser&amp;diff=4615&amp;oldid=prev</id>
		<title> в 17:44, 31 мая 2010</title>
		<link rel="alternate" type="text/html" href="http://www.jexp.ru/index.php?title=Java_Tutorial/Network/HTML_Parser&amp;diff=4615&amp;oldid=prev"/>
				<updated>2010-05-31T17:44:27Z</updated>
		
		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;Новая страница&lt;/b&gt;&lt;/p&gt;&lt;div&gt;==  Escape HTML special characters from a String ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String[] argv){&lt;br /&gt;
    System.out.println(escapeHTML(&amp;quot;&amp;gt;&amp;lt;&amp;quot;));&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public static final String escapeHTML(String s) {&lt;br /&gt;
    StringBuffer sb = new StringBuffer();&lt;br /&gt;
    int n = s.length();&lt;br /&gt;
    for (int i = 0; i &amp;lt; n; i++) {&lt;br /&gt;
      char c = s.charAt(i);&lt;br /&gt;
      switch (c) {&lt;br /&gt;
      case &amp;quot;&amp;lt;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;lt;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;gt;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;gt;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;amp;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;quot;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;quot;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;agrave;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;agrave;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Agrave;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;acirc;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;acirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Acirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;auml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;auml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Auml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Auml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;aring;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;aring;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Aring;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Aring;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;aelig;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;aelig;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;AElig;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;AElig;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;ccedil;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;ccedil;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Ccedil;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Ccedil;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;eacute;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;eacute;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Eacute;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;egrave;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;egrave;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Egrave;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;ecirc;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;ecirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Ecirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;euml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;euml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Euml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Euml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;iuml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Iuml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;ocirc;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;ocirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Ocirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;ouml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;ouml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Ouml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Ouml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;oslash;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;oslash;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Oslash;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Oslash;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;szlig;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;szlig;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;ugrave;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;ugrave;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Ugrave;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;ucirc;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;ucirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Ucirc;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;uuml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;uuml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;&amp;amp;Uuml;&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;Uuml;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;reg;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;copy;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot;�&amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;euro;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      case &amp;quot; &amp;quot;:&lt;br /&gt;
        sb.append(&amp;quot;&amp;amp;nbsp;&amp;quot;);&lt;br /&gt;
        break;&lt;br /&gt;
      default:&lt;br /&gt;
        sb.append(c);&lt;br /&gt;
        break;&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
    return sb.toString();&lt;br /&gt;
  }&lt;br /&gt;
}&lt;br /&gt;
//&amp;amp;gt;&amp;amp;lt;&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  extends HTMLEditorKit.ParserCallback ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.InputStream;&lt;br /&gt;
import java.io.InputStreamReader;&lt;br /&gt;
import java.io.Reader;&lt;br /&gt;
import java.net.URL;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit;&lt;br /&gt;
import javax.swing.text.html.parser.ParserDelegator;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String args[]) throws Exception {&lt;br /&gt;
    URL url = new URL(args[0]);&lt;br /&gt;
    Reader reader = new InputStreamReader((InputStream) url.getContent());&lt;br /&gt;
    new ParserDelegator().parse(reader, new TextOnly(), false);&lt;br /&gt;
  }&lt;br /&gt;
}&lt;br /&gt;
class TextOnly extends HTMLEditorKit.ParserCallback {&lt;br /&gt;
  public void handleText(char[] data, int pos) {&lt;br /&gt;
    System.out.println(data);&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  Extract links from an HTML page ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.FileReader;&lt;br /&gt;
import java.util.ArrayList;&lt;br /&gt;
import javax.swing.text.MutableAttributeSet;&lt;br /&gt;
import javax.swing.text.html.HTML.Attribute;&lt;br /&gt;
import javax.swing.text.html.HTML.Tag;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;&lt;br /&gt;
import javax.swing.text.html.parser.ParserDelegator;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public final static void main(String[] args) throws Exception {&lt;br /&gt;
    final ArrayList&amp;lt;String&amp;gt; list = new ArrayList&amp;lt;String&amp;gt;();&lt;br /&gt;
    ParserDelegator parserDelegator = new ParserDelegator();&lt;br /&gt;
    ParserCallback parserCallback = new ParserCallback() {&lt;br /&gt;
      public void handleText(final char[] data, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) {&lt;br /&gt;
        if (tag == Tag.A) {&lt;br /&gt;
          String address = (String) attribute.getAttribute(Attribute.HREF);&lt;br /&gt;
          list.add(address);&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      public void handleEndTag(Tag t, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleComment(final char[] data, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleError(final java.lang.String errMsg, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
    };&lt;br /&gt;
    parserDelegator.parse(new FileReader(&amp;quot;a.html&amp;quot;), parserCallback, false);&lt;br /&gt;
    System.out.println(list);&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  Find and display hyperlinks contained within a web page ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.BufferedReader;&lt;br /&gt;
import java.io.FileReader;&lt;br /&gt;
import java.util.regex.Matcher;&lt;br /&gt;
import java.util.regex.Pattern;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String[] arguments)throws Exception {&lt;br /&gt;
    StringBuffer output = new StringBuffer();&lt;br /&gt;
    FileReader file = new FileReader(&amp;quot;a.htm&amp;quot;);&lt;br /&gt;
    BufferedReader buff = new BufferedReader(file);&lt;br /&gt;
    boolean eof = false;&lt;br /&gt;
    while (!eof) {&lt;br /&gt;
      String line = buff.readLine();&lt;br /&gt;
      if (line == null)&lt;br /&gt;
        eof = true;&lt;br /&gt;
      else&lt;br /&gt;
        output.append(line + &amp;quot;\n&amp;quot;);&lt;br /&gt;
    }&lt;br /&gt;
    buff.close();&lt;br /&gt;
    String page = output.toString();&lt;br /&gt;
    Pattern pattern = Pattern.rupile(&amp;quot;&amp;lt;a.+href=\&amp;quot;(.+?)\&amp;quot;&amp;quot;);&lt;br /&gt;
    Matcher matcher = pattern.matcher(page);&lt;br /&gt;
    while (matcher.find()) {&lt;br /&gt;
      System.out.println(matcher.group(1));&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  Get all hyper links from a web page ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.InputStream;&lt;br /&gt;
import java.io.InputStreamReader;&lt;br /&gt;
import java.io.Reader;&lt;br /&gt;
import java.net.URL;&lt;br /&gt;
import javax.swing.text.MutableAttributeSet;&lt;br /&gt;
import javax.swing.text.html.HTML;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit;&lt;br /&gt;
import javax.swing.text.html.parser.ParserDelegator;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String args[]) throws Exception {&lt;br /&gt;
    URL url = new URL(args[0]);&lt;br /&gt;
    Reader reader = new InputStreamReader((InputStream) url.getContent());&lt;br /&gt;
    System.out.println(&amp;quot;&amp;lt;HTML&amp;gt;&amp;lt;HEAD&amp;gt;&amp;lt;TITLE&amp;gt;Links for &amp;quot; + args[0] + &amp;quot;&amp;lt;/TITLE&amp;gt;&amp;quot;);&lt;br /&gt;
    System.out.println(&amp;quot;&amp;lt;BASE HREF=\&amp;quot;&amp;quot; + args[0] + &amp;quot;\&amp;quot;&amp;gt;&amp;lt;/HEAD&amp;gt;&amp;quot;);&lt;br /&gt;
    System.out.println(&amp;quot;&amp;lt;BODY&amp;gt;&amp;quot;);&lt;br /&gt;
    new ParserDelegator().parse(reader, new LinkPage(), false);&lt;br /&gt;
    System.out.println(&amp;quot;&amp;lt;/BODY&amp;gt;&amp;lt;/HTML&amp;gt;&amp;quot;);&lt;br /&gt;
  }&lt;br /&gt;
}&lt;br /&gt;
class LinkPage extends HTMLEditorKit.ParserCallback {&lt;br /&gt;
  public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {&lt;br /&gt;
    if (t == HTML.Tag.A) {&lt;br /&gt;
      System.out.println(&amp;quot;&amp;lt;BR&amp;gt;&amp;quot;);&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  Getting the Links in an HTML Document ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.InputStreamReader;&lt;br /&gt;
import java.io.Reader;&lt;br /&gt;
import java.net.URI;&lt;br /&gt;
import java.net.URL;&lt;br /&gt;
import java.net.URLConnection;&lt;br /&gt;
import javax.swing.text.EditorKit;&lt;br /&gt;
import javax.swing.text.SimpleAttributeSet;&lt;br /&gt;
import javax.swing.text.html.HTML;&lt;br /&gt;
import javax.swing.text.html.HTMLDocument;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String[] argv) throws Exception {&lt;br /&gt;
    URL url = new URI(&amp;quot;http://www.google.ru&amp;quot;).toURL();&lt;br /&gt;
    URLConnection conn = url.openConnection();&lt;br /&gt;
    Reader rd = new InputStreamReader(conn.getInputStream());&lt;br /&gt;
    EditorKit kit = new HTMLEditorKit();&lt;br /&gt;
    HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument();&lt;br /&gt;
    kit.read(rd, doc, 0);&lt;br /&gt;
    HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A);&lt;br /&gt;
    while (it.isValid()) {&lt;br /&gt;
      SimpleAttributeSet s = (SimpleAttributeSet) it.getAttributes();&lt;br /&gt;
      String link = (String) s.getAttribute(HTML.Attribute.HREF);&lt;br /&gt;
      if (link != null) {&lt;br /&gt;
        System.out.println(link);&lt;br /&gt;
      }&lt;br /&gt;
      it.next();&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  Getting the Text in an HTML Document ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.InputStreamReader;&lt;br /&gt;
import java.io.Reader;&lt;br /&gt;
import java.net.URI;&lt;br /&gt;
import java.net.URL;&lt;br /&gt;
import java.net.URLConnection;&lt;br /&gt;
import javax.swing.text.EditorKit;&lt;br /&gt;
import javax.swing.text.html.HTMLDocument;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String[] argv) throws Exception {&lt;br /&gt;
    HTMLDocument doc = new HTMLDocument() {&lt;br /&gt;
      public HTMLEditorKit.ParserCallback getReader(int pos) {&lt;br /&gt;
        return new HTMLEditorKit.ParserCallback() {&lt;br /&gt;
          public void handleText(char[] data, int pos) {&lt;br /&gt;
            System.out.println(data);&lt;br /&gt;
          }&lt;br /&gt;
        };&lt;br /&gt;
      }&lt;br /&gt;
    };&lt;br /&gt;
    URL url = new URI(&amp;quot;http://www.google.ru&amp;quot;).toURL();&lt;br /&gt;
    URLConnection conn = url.openConnection();&lt;br /&gt;
    Reader rd = new InputStreamReader(conn.getInputStream());&lt;br /&gt;
    EditorKit kit = new HTMLEditorKit();&lt;br /&gt;
    kit.read(rd, doc, 0);&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  HTML Parser ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
/*******************************************************************************&lt;br /&gt;
 * Copyright (c) 2004 Actuate Corporation.&lt;br /&gt;
 * All rights reserved. This program and the accompanying materials&lt;br /&gt;
 * are made available under the terms of the Eclipse Public License v1.0&lt;br /&gt;
 * which accompanies this distribution, and is available at&lt;br /&gt;
 * http://www.eclipse.org/legal/epl-v10.html&lt;br /&gt;
 *&lt;br /&gt;
 * Contributors:&lt;br /&gt;
 *  Actuate Corporation  - initial API and implementation&lt;br /&gt;
 *******************************************************************************/&lt;br /&gt;
&lt;br /&gt;
import java.io.FileNotFoundException;&lt;br /&gt;
import java.io.FileReader;&lt;br /&gt;
import java.io.IOException;&lt;br /&gt;
import java.io.LineNumberReader;&lt;br /&gt;
import java.util.ArrayList;&lt;br /&gt;
public class HTMLParser&lt;br /&gt;
{&lt;br /&gt;
  FileReader reader;&lt;br /&gt;
  LineNumberReader in;&lt;br /&gt;
  String token;&lt;br /&gt;
  ArrayList attribs = new ArrayList( );&lt;br /&gt;
  int pushC = -1;&lt;br /&gt;
  private boolean ignoreWhitespace = true;&lt;br /&gt;
  &lt;br /&gt;
  public static final int EOF = -1;&lt;br /&gt;
  public static final int TEXT = 1;&lt;br /&gt;
  public static final int DOCTYPE = 2;&lt;br /&gt;
  public static final int ELEMENT = 3;&lt;br /&gt;
  public static final int COMMENT = 4;&lt;br /&gt;
  public static final int SPECIAL_ELEMENT = 5;&lt;br /&gt;
  &lt;br /&gt;
  public static final int START_ELEMENT = 0;&lt;br /&gt;
  public static final int END_ELEMENT = 1;&lt;br /&gt;
  public static final int SINGLE_ELEMENT = 2;&lt;br /&gt;
  &lt;br /&gt;
  public HTMLParser( )&lt;br /&gt;
  {&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public void open( String fileName ) throws FileNotFoundException&lt;br /&gt;
  {&lt;br /&gt;
    reader = new FileReader( fileName );&lt;br /&gt;
    in = new LineNumberReader( reader );&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  /**&lt;br /&gt;
   * &lt;br /&gt;
   */&lt;br /&gt;
  public void close( )&lt;br /&gt;
  {&lt;br /&gt;
    try&lt;br /&gt;
    {&lt;br /&gt;
      in.close( );&lt;br /&gt;
      reader.close( );&lt;br /&gt;
    }&lt;br /&gt;
    catch ( IOException e1 )&lt;br /&gt;
    {&lt;br /&gt;
      // Ignore&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
  public String getTokenText( )&lt;br /&gt;
  {&lt;br /&gt;
    return token;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public int getElementType( )&lt;br /&gt;
  {&lt;br /&gt;
    if ( token.startsWith( &amp;quot;/&amp;quot; ) ) //$NON-NLS-1$&lt;br /&gt;
      return END_ELEMENT;&lt;br /&gt;
    if ( token.endsWith( &amp;quot;/&amp;quot; ) ) //$NON-NLS-1$&lt;br /&gt;
      return SINGLE_ELEMENT;&lt;br /&gt;
    return START_ELEMENT;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public String getElement( )&lt;br /&gt;
  {&lt;br /&gt;
    if ( token.startsWith( &amp;quot;/&amp;quot; ) ) //$NON-NLS-1$&lt;br /&gt;
      return token.substring( 1 );&lt;br /&gt;
    if ( token.endsWith( &amp;quot;/&amp;quot; ) ) //$NON-NLS-1$&lt;br /&gt;
      return token.substring( 0, token.length( ) - 1 );&lt;br /&gt;
    return token;&lt;br /&gt;
    &lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public ArrayList getAttribs( )&lt;br /&gt;
  {&lt;br /&gt;
    return attribs;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public String getAttrib( String name )&lt;br /&gt;
  {&lt;br /&gt;
    for ( int i = 0;  i &amp;lt; attribs.size( );  i++ )&lt;br /&gt;
    {&lt;br /&gt;
      AttribPair a = (AttribPair) attribs.get( i );&lt;br /&gt;
      if ( a.attrib.equalsIgnoreCase( name ) )&lt;br /&gt;
        return a.value;&lt;br /&gt;
    }&lt;br /&gt;
    return null;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  private int getC( )&lt;br /&gt;
  {&lt;br /&gt;
    if ( pushC != -1 )&lt;br /&gt;
    {&lt;br /&gt;
      int c = pushC;&lt;br /&gt;
      pushC = -1;&lt;br /&gt;
      return c;&lt;br /&gt;
    }&lt;br /&gt;
    try&lt;br /&gt;
    {&lt;br /&gt;
      return in.read( );&lt;br /&gt;
    }&lt;br /&gt;
    catch ( IOException e )&lt;br /&gt;
    {&lt;br /&gt;
      return EOF;&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  private void pushC( int c )&lt;br /&gt;
  {&lt;br /&gt;
    pushC = c;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public int getToken( )&lt;br /&gt;
  {&lt;br /&gt;
    for ( ; ; )&lt;br /&gt;
    {&lt;br /&gt;
      int c = getC( );&lt;br /&gt;
      switch ( c )&lt;br /&gt;
      {&lt;br /&gt;
        case -1:&lt;br /&gt;
          return EOF;&lt;br /&gt;
        case &amp;quot;&amp;lt;&amp;quot;:&lt;br /&gt;
          return getElement( c );&lt;br /&gt;
        default:&lt;br /&gt;
        {&lt;br /&gt;
          parseText( c );&lt;br /&gt;
          if ( ! ignoreWhitespace  ||  token.trim( ).length( ) &amp;gt; 0 )&lt;br /&gt;
            return TEXT;&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
  private int parseText( int c )&lt;br /&gt;
  {&lt;br /&gt;
    StringBuffer text = new StringBuffer( );&lt;br /&gt;
    for ( ; ; )&lt;br /&gt;
    {&lt;br /&gt;
      if ( c == EOF )&lt;br /&gt;
        break;&lt;br /&gt;
      if ( c == &amp;quot;&amp;lt;&amp;quot; )&lt;br /&gt;
      {&lt;br /&gt;
        pushC( c );&lt;br /&gt;
        break;&lt;br /&gt;
      }&lt;br /&gt;
      &lt;br /&gt;
      // Convert MS-Word-style quotes.&lt;br /&gt;
      &lt;br /&gt;
      if ( c == 8220  ||  c == 8221 )&lt;br /&gt;
        text.append( &amp;quot;&amp;amp;quot;&amp;quot; );&lt;br /&gt;
      else&lt;br /&gt;
        text.append( (char) c );&lt;br /&gt;
      c = getC( );&lt;br /&gt;
    }&lt;br /&gt;
    token = text.toString( );&lt;br /&gt;
    return TEXT;&lt;br /&gt;
  }&lt;br /&gt;
  private int skipSpace( int c )&lt;br /&gt;
  {&lt;br /&gt;
    while ( c != EOF  &amp;amp;&amp;amp;  Character.isWhitespace( (char)c ) )&lt;br /&gt;
    {&lt;br /&gt;
      c = getC( );&lt;br /&gt;
    }&lt;br /&gt;
    return c;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  private int getElement( int c )&lt;br /&gt;
  {&lt;br /&gt;
    c = getC( );&lt;br /&gt;
    &lt;br /&gt;
    // Broken element&lt;br /&gt;
    &lt;br /&gt;
    if ( c == EOF )&lt;br /&gt;
      return EOF;&lt;br /&gt;
    &lt;br /&gt;
    if ( c == &amp;quot;!&amp;quot; )&lt;br /&gt;
      return getSpecialElement( );&lt;br /&gt;
    &lt;br /&gt;
    attribs.clear( );&lt;br /&gt;
    c = skipSpace( c );&lt;br /&gt;
    if ( c == EOF )&lt;br /&gt;
      return EOF;&lt;br /&gt;
    &lt;br /&gt;
    StringBuffer tag = new StringBuffer( );&lt;br /&gt;
    if ( c == &amp;quot;/&amp;quot; )&lt;br /&gt;
    {&lt;br /&gt;
      tag.append( (char) c );&lt;br /&gt;
      c = skipSpace( getC( ) );&lt;br /&gt;
      while ( c != EOF  &amp;amp;&amp;amp;  c != &amp;quot;&amp;gt;&amp;quot;  &amp;amp;&amp;amp; ! Character.isWhitespace( (char)c ) )&lt;br /&gt;
      {&lt;br /&gt;
        tag.append( (char) c );&lt;br /&gt;
        c = getC( );&lt;br /&gt;
      }&lt;br /&gt;
      token = tag.toString( );&lt;br /&gt;
      for ( ; ; )&lt;br /&gt;
      {&lt;br /&gt;
        if ( c == &amp;quot;&amp;gt;&amp;quot;  ||  c == -1 )&lt;br /&gt;
          break;&lt;br /&gt;
        c = getC( );&lt;br /&gt;
      }&lt;br /&gt;
      return ELEMENT;     &lt;br /&gt;
    }&lt;br /&gt;
    &lt;br /&gt;
    while ( c != EOF  &amp;amp;&amp;amp;  c != &amp;quot;&amp;gt;&amp;quot;  &amp;amp;&amp;amp;  c != &amp;quot;/&amp;quot;  &amp;amp;&amp;amp; ! Character.isWhitespace( (char)c ) )&lt;br /&gt;
    {&lt;br /&gt;
      tag.append( (char) c );&lt;br /&gt;
      c = getC( );&lt;br /&gt;
    }&lt;br /&gt;
    if ( c == EOF )&lt;br /&gt;
    {&lt;br /&gt;
      token = tag.toString( );&lt;br /&gt;
      return ELEMENT;&lt;br /&gt;
    }&lt;br /&gt;
    &lt;br /&gt;
    for ( ; ; )&lt;br /&gt;
    {&lt;br /&gt;
      c = skipSpace( c );&lt;br /&gt;
      if ( c == EOF  ||  c == &amp;quot;&amp;gt;&amp;quot; || c == &amp;quot;/&amp;quot; )&lt;br /&gt;
        break;&lt;br /&gt;
      c = getAttrib( c );&lt;br /&gt;
    }&lt;br /&gt;
    if ( c == &amp;quot;/&amp;quot; )&lt;br /&gt;
    {&lt;br /&gt;
      tag.append( (char) c );&lt;br /&gt;
      for ( ; ; )&lt;br /&gt;
      {&lt;br /&gt;
        c = getC( );&lt;br /&gt;
        if ( c == -1  ||  c == &amp;quot;&amp;gt;&amp;quot; )&lt;br /&gt;
          break;&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
    token = tag.toString( );&lt;br /&gt;
    return ELEMENT;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  private int getAttrib( int c )&lt;br /&gt;
  {&lt;br /&gt;
    AttribPair a = new AttribPair( );&lt;br /&gt;
    StringBuffer s = new StringBuffer( );&lt;br /&gt;
    while ( c != EOF  &amp;amp;&amp;amp;  c != &amp;quot;=&amp;quot;  &amp;amp;&amp;amp;  ! Character.isWhitespace( (char)c ) )&lt;br /&gt;
    {&lt;br /&gt;
      s.append( (char) c );&lt;br /&gt;
      c = getC( );&lt;br /&gt;
    }&lt;br /&gt;
    a.attrib = s.toString( );&lt;br /&gt;
    c = skipSpace( c );&lt;br /&gt;
    if ( c != &amp;quot;=&amp;quot; )&lt;br /&gt;
    {&lt;br /&gt;
      attribs.add( a );&lt;br /&gt;
      return c;&lt;br /&gt;
    }&lt;br /&gt;
    s = new StringBuffer( );&lt;br /&gt;
    c = skipSpace( getC( ) );&lt;br /&gt;
    if ( c == &amp;quot;\&amp;quot;&amp;quot; || c == &amp;quot;&amp;quot;&amp;quot; )&lt;br /&gt;
    {&lt;br /&gt;
      int quote = c;&lt;br /&gt;
      for ( ; ; )&lt;br /&gt;
      {&lt;br /&gt;
        c = getC( );&lt;br /&gt;
        if ( c == -1 )&lt;br /&gt;
          break;&lt;br /&gt;
        if ( c == quote )&lt;br /&gt;
        {&lt;br /&gt;
          c = getC( );&lt;br /&gt;
          break;&lt;br /&gt;
        }&lt;br /&gt;
        if ( c == &amp;quot;\\&amp;quot; )&lt;br /&gt;
        {&lt;br /&gt;
          c = getC( );&lt;br /&gt;
          if ( c == EOF )&lt;br /&gt;
            break;&lt;br /&gt;
          s.append( &amp;quot;\\&amp;quot; );&lt;br /&gt;
          s.append( (char) c );&lt;br /&gt;
        }&lt;br /&gt;
        else&lt;br /&gt;
        {&lt;br /&gt;
          s.append( (char) c );&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
    else&lt;br /&gt;
    {&lt;br /&gt;
      for ( ; ; )&lt;br /&gt;
      {&lt;br /&gt;
        c = getC( );&lt;br /&gt;
        if ( c == -1 )&lt;br /&gt;
          break;&lt;br /&gt;
        if ( c == &amp;quot;&amp;gt;&amp;quot;  ||  c == &amp;quot;/&amp;quot;  ||  Character.isWhitespace( (char)c ) )&lt;br /&gt;
        {&lt;br /&gt;
          c = getC( );&lt;br /&gt;
          break;&lt;br /&gt;
        }&lt;br /&gt;
        s.append( (char) c );&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
    a.value = s.toString( );&lt;br /&gt;
    attribs.add( a );&lt;br /&gt;
    return c;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  class AttribPair&lt;br /&gt;
  {&lt;br /&gt;
    String attrib;&lt;br /&gt;
    String value;&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  private int getSpecialElement(  )&lt;br /&gt;
  {&lt;br /&gt;
    StringBuffer text = new StringBuffer( );&lt;br /&gt;
    text.append( &amp;quot;&amp;lt;!&amp;quot; ); //$NON-NLS-1$&lt;br /&gt;
    for ( ; ; )&lt;br /&gt;
    {&lt;br /&gt;
      int c = getC( );&lt;br /&gt;
      if ( c == EOF || c == &amp;quot;&amp;gt;&amp;quot; )&lt;br /&gt;
        break;&lt;br /&gt;
      text.append( (char) c );&lt;br /&gt;
    }&lt;br /&gt;
    text.append( &amp;quot;&amp;gt;&amp;quot; );&lt;br /&gt;
    token = text.toString( );&lt;br /&gt;
    if ( token.startsWith( &amp;quot;&amp;lt;!--&amp;quot; ) ) //$NON-NLS-1$&lt;br /&gt;
      return COMMENT;&lt;br /&gt;
    return SPECIAL_ELEMENT;&lt;br /&gt;
  }&lt;br /&gt;
  static String formatTags[ ] =&lt;br /&gt;
  {&lt;br /&gt;
      &amp;quot;i&amp;quot;, &amp;quot;b&amp;quot;,  //$NON-NLS-1$//$NON-NLS-2$&lt;br /&gt;
      &amp;quot;strong&amp;quot;, &amp;quot;em&amp;quot;,  //$NON-NLS-1$//$NON-NLS-2$&lt;br /&gt;
      &amp;quot;code&amp;quot;, &amp;quot;span&amp;quot;, //$NON-NLS-1$ //$NON-NLS-2$&lt;br /&gt;
      &amp;quot;a&amp;quot; //$NON-NLS-1$&lt;br /&gt;
  };&lt;br /&gt;
  &lt;br /&gt;
  public boolean isFormatTag( )&lt;br /&gt;
  {&lt;br /&gt;
    return isFormatTag( getElement( ) );&lt;br /&gt;
  }&lt;br /&gt;
  &lt;br /&gt;
  public boolean isFormatTag( String tag )&lt;br /&gt;
  {&lt;br /&gt;
    for ( int i = 0;  i &amp;lt; formatTags.length;  i++ )&lt;br /&gt;
    {&lt;br /&gt;
      if ( formatTags[ i ].equalsIgnoreCase( tag ) )&lt;br /&gt;
        return true;&lt;br /&gt;
    }&lt;br /&gt;
    return false;&lt;br /&gt;
  }&lt;br /&gt;
  public Object getFullElement( )&lt;br /&gt;
  {&lt;br /&gt;
    StringBuffer text = new StringBuffer( );&lt;br /&gt;
    text.append( &amp;quot;&amp;lt;&amp;quot; );&lt;br /&gt;
    int elementType = getElementType( );&lt;br /&gt;
    if ( elementType == END_ELEMENT )&lt;br /&gt;
      text.append( &amp;quot;/&amp;quot; );&lt;br /&gt;
    text.append( getElement( ) );&lt;br /&gt;
    &lt;br /&gt;
    for ( int i = 0;  i &amp;lt; attribs.size( );  i++ )&lt;br /&gt;
    {&lt;br /&gt;
      text.append( &amp;quot; &amp;quot; );&lt;br /&gt;
      AttribPair a = (AttribPair) attribs.get( i );&lt;br /&gt;
      text.append( a.attrib );&lt;br /&gt;
      text.append( &amp;quot;=\&amp;quot;&amp;quot; ); //$NON-NLS-1$&lt;br /&gt;
      if ( a.value != null )&lt;br /&gt;
        text.append( a.value );&lt;br /&gt;
      text.append( &amp;quot;\&amp;quot;&amp;quot; ); //$NON-NLS-1$&lt;br /&gt;
    }&lt;br /&gt;
    if ( elementType == SINGLE_ELEMENT )&lt;br /&gt;
      text.append( &amp;quot;/&amp;quot; );&lt;br /&gt;
    text.append( &amp;quot;&amp;gt;&amp;quot; );&lt;br /&gt;
    return text.toString( );&lt;br /&gt;
  }&lt;br /&gt;
  public int getLineNo( )&lt;br /&gt;
  {&lt;br /&gt;
    return in.getLineNumber( );&lt;br /&gt;
  }&lt;br /&gt;
  public void ignoreWhitespace( boolean b )&lt;br /&gt;
  {&lt;br /&gt;
    ignoreWhitespace = b;&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  HTML parser based on HTMLEditorKit.ParserCallback ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.InputStream;&lt;br /&gt;
import java.io.InputStreamReader;&lt;br /&gt;
import java.io.Reader;&lt;br /&gt;
import java.net.URL;&lt;br /&gt;
import javax.swing.text.MutableAttributeSet;&lt;br /&gt;
import javax.swing.text.html.HTML;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit;&lt;br /&gt;
import javax.swing.text.html.parser.ParserDelegator;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String args[]) throws Exception {&lt;br /&gt;
    URL url = new URL(args[0]);&lt;br /&gt;
    Reader reader = new InputStreamReader((InputStream) url.getContent());&lt;br /&gt;
    new ParserDelegator().parse(reader, new HTMLParse(), false);&lt;br /&gt;
  }&lt;br /&gt;
}&lt;br /&gt;
class HTMLParse extends HTMLEditorKit.ParserCallback {&lt;br /&gt;
  public void handleText(char[] data, int pos) {&lt;br /&gt;
    System.out.println(data);&lt;br /&gt;
  }&lt;br /&gt;
  public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {&lt;br /&gt;
    System.out.println(&amp;quot;+&amp;quot; + t.toString());&lt;br /&gt;
  }&lt;br /&gt;
  public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {&lt;br /&gt;
    System.out.println(&amp;quot;*&amp;quot; + t.toString());&lt;br /&gt;
  }&lt;br /&gt;
  public void handleEndTag(HTML.Tag t, int pos) {&lt;br /&gt;
    System.out.println(&amp;quot;-&amp;quot; + t.toString());&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==  Using javax.swing.text.html.HTMLEditorKit to parse html document ==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- start source code --&amp;gt;&lt;br /&gt;
   &lt;br /&gt;
    &amp;lt;source lang=&amp;quot;java&amp;quot;&amp;gt;&lt;br /&gt;
import java.io.FileReader;&lt;br /&gt;
import java.io.IOException;&lt;br /&gt;
import java.io.Reader;&lt;br /&gt;
import java.util.ArrayList;&lt;br /&gt;
import java.util.List;&lt;br /&gt;
import javax.swing.text.MutableAttributeSet;&lt;br /&gt;
import javax.swing.text.html.HTML.Tag;&lt;br /&gt;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;&lt;br /&gt;
import javax.swing.text.html.parser.ParserDelegator;&lt;br /&gt;
public class Main {&lt;br /&gt;
  public static void main(String[] args) throws Exception {&lt;br /&gt;
    final List&amp;lt;String&amp;gt; list = new ArrayList&amp;lt;String&amp;gt;();&lt;br /&gt;
    ParserDelegator parserDelegator = new ParserDelegator();&lt;br /&gt;
    ParserCallback parserCallback = new ParserCallback() {&lt;br /&gt;
      public void handleText(final char[] data, final int pos) {&lt;br /&gt;
        list.add(new String(data));&lt;br /&gt;
      }&lt;br /&gt;
      public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleEndTag(Tag t, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleComment(final char[] data, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
      public void handleError(final java.lang.String errMsg, final int pos) {&lt;br /&gt;
      }&lt;br /&gt;
    };&lt;br /&gt;
    parserDelegator.parse(new FileReader(&amp;quot;a.html&amp;quot;), parserCallback, true);&lt;br /&gt;
    System.out.println(list);&lt;br /&gt;
  }&lt;br /&gt;
}&amp;lt;/source&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
   &lt;br /&gt;
  &amp;lt;!-- end source code --&amp;gt;&lt;/div&gt;</summary>
			</entry>

	</feed>