import java.io.*; import java.util.*; import java.net.*; public class WebSurveyor { // Initialize a WebSurveyor object capable of giving a set of WebPages reachable // from startingUrl all on the site defined by siteString public WebSurveyor(String startingUrl, String siteString) throws IOException { wpList = new Set(); urlList = new Set(); if (startingUrl.indexOf(siteString) == -1) return; WebPage wp = new WebPage(startingUrl); wpList.addElement(wp); urlList.addElement(startingUrl); survey(wp,siteString); } // Examine the web pages of the site defined by siteString and reachable from the // web page wp private void survey(WebPage wp, String siteString) throws IOException { Set links = wp.getLinks(); Enumeration e = links.elements(); while (e.hasMoreElements()) { String url = (String) e.nextElement(); if (url.indexOf(siteString)!=-1 && !urlList.contains(url)) { WebPage wp2 = new WebPage(url); urlList.addElement(url); if (!wp2.isBad()) { wpList.addElement(wp2); survey(wp2,siteString); } } } } Set getPages() { return wpList; } private Set wpList;// set of existing web pages whose URLs contain // the siteString and that are reachable from // the initial URL given to WebSurveyor private Set urlList;// set of URLs good or bad that have already // been surveyed private static void showBad(WebPage wp, Set badset) { System.out.println("Bad pages of "+wp.getURL()+":"); Enumeration ebad = badset.elements(); while (ebad.hasMoreElements()) System.out.println(ebad.nextElement()); } public static void main(String[] args) throws IOException { String startingUrl = args[0]; String siteString = args[1]; WebSurveyor ws = new WebSurveyor(startingUrl,siteString); Set pset = ws.getPages(); Enumeration e = pset.elements(); while (e.hasMoreElements()) { WebPage wp = (WebPage) e.nextElement(); Set badset = wp.getBadLinks(); if (!badset.isEmpty()) showBad(wp,badset); } } }