import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;

class FetchThread extends Thread
{
	private final int maxdepth = 3;
	private String url; 
	private int depth;
	private static Vector history = new Vector();
	private static String url_regex = "http://[a-zA-Z0-9/%.~&=-]+(htm|html|/)";
	private static Pattern pattern = Pattern.compile(url_regex);
	private Matcher matcher;

	public void run()
	{
		String content = "", s, newurl;

		/**
		 * 若深度超過 maxdepth 則停止
		 */
		if( depth > maxdepth ) {
			return;
		}

		try {
			URL u = new URL(url);
			BufferedReader in = new BufferedReader( new InputStreamReader( u.openStream()));

			/**
			 * 將網頁內容讀至 content
			 */
			while ((s = in.readLine()) != null) {
				content += s;
			}
			in.close();

			/**
			 * 對每個符合 pattern 的 match 作處理
			 */
			matcher = pattern.matcher(content);
			while(matcher.find()) {
				newurl = matcher.group();

				/**
				 * 去過的 link 不該再去，因此擺進 history
				 */
				if(history.contains(newurl)) {
					continue;
				} else {
					history.add(newurl);
				}

				/**
				 * 印出 depth 以及 url
				 */
				System.err.println("depth: " + depth + ", url: " + newurl);
				new FetchThread(newurl, depth + 1).start();
			}
		} catch(MalformedURLException e) {
		} catch(IOException e) {
		} catch(IllegalMonitorStateException e) {
		} catch(NullPointerException e) {
		}
	}

	FetchThread(String u, int d)
	{
		url = u;
		depth = d;
	}
}

public class SpiderFetch
{
	public static void main(String[] args)
	{
		new FetchThread(args[0], 1).start();
	}
}

