/* * SiteTool.java * * Created on 2005Äê4ÔÂ28ÈÕ, ÏÂÎç1:18 */ /** * * @author Administrator */ import java.awt.*; import java.awt.event.*; import javax.swing.*; import java.awt.*; //for basic events import java.io.*; //for io exception import java.net.*; //for URLS import java.util.*; //for hash import javax.swing.JOptionPane; //for joptionpane import javax.swing.event.*; //for hypertextlistener import javax.swing.text.*;// for bad location exception import javax.swing.text.html.*; //for html editor kit import javax.swing.DefaultListModel; // model for the jList import javax.swing.ImageIcon; //for image icon import java.io.File; import javax.swing.*; import javax.swing.filechooser.*; public class SiteTool extends javax.swing.JFrame { /** Creates new form SiteTool */ public SiteTool() { hrefModel = new DefaultListModel(); //default values MaxPages=30; IncludeImage=0; InsideSite=1; initComponents(); urlHs = new HashSet(); urlList=new ArrayList(); urlHt=new Hashtable(); resultList=new ArrayList(); this.jScrollPaneLeft.getViewport().add(UrlList, null); //link urlList to hrefModel //UrlList.setModel(hrefModel); //UrlList.setValueIsAdjusting(false); } //urlHs is to garauntee that no repeat url will be saved to urlList private HashSet urlHs; private ArrayList urlList; //key is each url, value is urlDetail class; private Hashtable urlHt; //sorted list of urlDetail; private ArrayList resultList; //Spider will go through all url in this hrefModel //to fetch new url and then save them to hrefModel. //The loop is located in getPages() private DefaultListModel hrefModel; private int PageIndex, MaxPages; private int IncludeImage, InsideSite; private String seedURL, seedBase, currentURL; public int state; public int size; public long lastMod = 0; public String title; private int displayType=1;//0:explorer; 1: summary; 2: source private int sortbyType=0;//0-4: InLink, OutLink, ErrorLink, Last Modified, State /** because string in the list is not only url, also in #, out#, err# * we need to process list string to get url * the idea substring before the first " " is url; */ String getUrlByListStr(String listStr){ int pos1=listStr.indexOf(""); int pos2=listStr.indexOf(" state:"); // System.out.println("pos1= "+pos1+" pos2= "+pos2); if (pos2 <=0 || pos1<0){ return " "; } else{ return listStr.substring(pos1+6,pos2); } } private boolean isValidUrl(String url){ if(url.startsWith("http://")) return true; else return false; } private String getSeedBase(String url){ int pos=url.indexOf("/", 7); if(pos<=0) return url; else return url.substring(0,pos); } //Spider start to run! void getPages() { seedURL=UrlTextField.getText(); if(!isValidUrl(seedURL)){ statusDisplay("Seed URL is not valid!"); return; } seedBase=getSeedBase(seedURL); PageIndex=0; hrefModel.clear(); urlHs.clear(); urlList.clear(); urlHt.clear(); urlHs.add(seedURL); urlList.add(seedURL); urlHt.put(new String(seedURL), new urlDetail(seedURL)); while (PageIndex0) { p1 = myText.indexOf('>',p1); p2 = myText.indexOf('<',p1); if (p2>0) title = myText.substring(p1+1,p2).trim(); } int m1,m2,m3; m1 = myText.indexOf("0){ m3 = myText.indexOf('>',m1); if(m3>0) meta=myText.substring(m1+5,m3-1); } p1 = myText.indexOf("keywords"); while (p1>0) { m1 = myText.lastIndexOf("',p1); if (m3<0||m2>m3) break; meta=myText.substring(p1+m1,p1+m3); p1 = myText.indexOf('\"',m2); if (p1<0) p1 = myText.indexOf('\'',m2); if (p1<0) p1 = myText.indexOf('=',m2); p2 = myText.indexOf('\"',p1+1); if (p2<0) p2 = myText.indexOf('\'',p1+1); if (p1>0&&p2>0) keywords = myText.substring(p1+1,p2).trim(); } //save //if(urlHt.containsKey(currentURL)){ try{ ((urlDetail)urlHt.get(currentURL)).updateHead(title,keywords,meta); } catch(NullPointerException e){ System.out.println("currentURL not exist in urlHt:"+currentURL); } } boolean isImage(String url){ //image //in future, should modify as .jpg appear in the end of url. if(url.indexOf(".jpg")>=0||url.indexOf(".gif")>=0||url.indexOf(".JPG")>=0||url.indexOf(".GIF")>=0){ return true; } else{ return false; } } private void myGetLinks(String SURL,String contentStr) { //? why set cursor? I forgot. setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) ); javax.swing.text.html.HTMLEditorKit htmlEditorKit = new javax.swing.text.html.HTMLEditorKit(); HTMLDocument htmlDoc = new HTMLDocument(); Reader htmlReader = null; String sbase, newURL; try{ //URL myurl = new URL(SURL); //htmlReader = new InputStreamReader(myurl.openConnection().getInputStream()); //htmlReader = new InputStreamReader(connect.getInputStream()); htmlReader = new StringReader(contentStr); htmlEditorKit.read(htmlReader, htmlDoc, 0); } catch (ChangedCharSetException e){ //fetchURL() is to readin the content of a HTML //String urlString = fetchURL(SURL); //System.out.println("changed char set: " + e.getCharSetSpec()); int pos = contentStr.toLowerCase().indexOf("charset") ; String htmlSource = contentStr.substring(pos+11); htmlReader = new StringReader(htmlSource); try {htmlEditorKit.read(htmlReader, htmlDoc,0); } catch (IOException ie){ System.err.println("iread"+ie); } catch (BadLocationException ie){ System.err.println("iloc"+ie); } } catch (BadLocationException e){ System.err.println("bloc"+e); } catch (MalformedURLException murle){ System.err.println("MURL"+murle); } catch (IOException e){ System.err.println("IOE"+e); } catch (Exception ee){ System.err.println("Other Exception"+ee); return; } URL base=htmlDoc.getBase();//base = "http://......./.../" if (base==null){ sbase = fabricatebase(SURL);//create a sbase when it is null } else{ sbase=base.toString(); } //add source when it is in frame or img ElementIterator it = new ElementIterator(htmlDoc);//tag iterator javax.swing.text.Element elem; while ((elem = it.next()) != null){ if(elem.getName().equalsIgnoreCase("frame")){ String href; if ((href = (String)elem.getAttributes().getAttribute (HTML.Attribute.SRC)) != null){ gotURL(sbase, href); } } if (IncludeImage==1){ if (elem.getName().equalsIgnoreCase("img")){ String href; if ((href = (String)elem.getAttributes().getAttribute (HTML.Attribute.SRC)) != null){ gotURL(sbase, href); } } }//end IncludeImage=1 }//end while //add source when it is in HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A); while(iterator.isValid()){ String href = (String)iterator.getAttributes().getAttribute(HTML.Attribute.HREF); if (href!=null){ gotURL(sbase, href); } iterator.next(); }//end while setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) ); } private void gotURL(String sbase, String href){ String absurl = normalizeURL(sbase, href); if(absurl==null) return; int i; if (absurl.indexOf(".pdf")>=0 || absurl.indexOf(".ps")>=0 || absurl.indexOf(".xml")>=0 || absurl.indexOf("mailto")>=0 || absurl.indexOf(".htm")<=0 || absurl.indexOf(".html")<=0 ){ modifyFromTo(currentURL, absurl,1);//only add to, not from return; } if (InsideSite == 1){//inside site //String seedURL=textField.getText(); if (absurl.startsWith(this.seedBase) && !urlHs.contains(absurl)){ urlHs.add(absurl);//add absurl to urlHs urlList.add(absurl); // System.out.println("url added: "+absurl); modifyFromTo(currentURL, absurl, 0);//add from &to //model.addElement(absurl);//add absurl to urlHs, a DefaultListModel } else{//outsite //don't add new urlDetail for new url. modifyFromTo(currentURL, absurl,1);//only add to, not from } } else{//not only inside site, it may be outside site if (!urlHs.contains(absurl)){ urlHs.add(absurl); urlList.add(absurl); // System.out.println("url added: "+absurl); modifyFromTo(currentURL, absurl,0); //model.addElement(absurl); } } } //normalize URL and add URL to urlHs, and urlList //normalizeURL(sbase, href) private String normalizeURL(String base, String in) { int i; String absurl,s,front1, front2,back,temp; if ( urlList.size()>=MaxPages) return null; s=in; if ((i = in.indexOf('#'))>=0)//fragement s = in.substring(0,i); if ((i = in.indexOf('?'))>=0)//query s = in.substring(0,i); if (in.indexOf('+')>=0 || in.indexOf('%')>=0){//decode try{ in = URLDecoder.decode(in); }catch(Exception e) {} } if ( IncludeImage==0){ if ((i = in.indexOf(".jpg"))>=0 || (i = in.indexOf(".JPG"))>=0) return null; if ((i = in.indexOf(".gif"))>=0 || (i = in.indexOf(".GIF"))>=0) return null; } if (s.startsWith("http://")) absurl=s.trim(); else{ if (s.indexOf("://")>0) return null; //file,ftp,gopher etc. absurl=base.trim()+s.trim(); } while ((i = absurl.indexOf("/../"))>0){//relative directory, parent level front1 = absurl.substring(0,i); back = absurl.substring(i+3); i = front1.lastIndexOf('/'); front2 = front1.substring(0,i); absurl = front2 + back; } while ((i = absurl.indexOf("/./"))>0){//relative directory, self level front1 = absurl.substring(0,i); back = absurl.substring(i+2); absurl = front1 + back; } //replace space with %20 while ((i = absurl.indexOf(" "))>0){ front1 = absurl.substring(0,i); back = absurl.substring(i+1); absurl = front1 + "%20"+back; } return absurl; } //read the content of a HTML private static String fetchURL (String urlString) { // this routine works around character set changes -- not clear how it does it. StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); try{ URL url = new URL (urlString); java.io.InputStream content = (InputStream)url.getContent(); BufferedReader in = new BufferedReader (new InputStreamReader (content)); String line; while ((line = in.readLine()) != null) { pw.println (line); } in.close(); pw.close(); }catch (MalformedURLException e) {pw.println ("Invalid URL"); } catch (IOException e) { pw.println ("Error reading URL"); } catch(ClassCastException cce){ pw.println ("ClassCastException: "+urlString); } return sw.toString(); }// end of fetchURL private String fabricatebase(String s) { String sbase; int i = s.lastIndexOf('/'); if (i<7) sbase=s+'/'; else sbase=s.substring(0,i+1); return sbase; } private void modifyFromTo(String fromURL, String toURL,int type){ urlDetail temp;// = new urlDetail(); //update from boolean contained=urlHt.containsKey(fromURL); if (contained){ temp = (urlDetail) urlHt.get(fromURL); } else{ temp=new urlDetail(fromURL); } temp.addTo(toURL); //temp.toHs.add(toURL); //temp.toSize = temp.toHs.size(); if(!contained){ urlHt.put(fromURL, temp); } if(type==0){ //update to contained=urlHt.containsKey(toURL); if (contained){ temp = (urlDetail) urlHt.get(toURL); } else{ temp=new urlDetail(toURL); } temp.addFrom(fromURL); //temp.fromHs.add(fromURL); //temp.fromSize = temp.fromHs.size(); if(!contained){ urlHt.put(toURL, temp); } } } private URLConnection updateurlDetail(String url){ URLConnection con; URL myUrl; //html try{ myUrl = new URL(url); con = myUrl.openConnection(); state = 200; lastMod = con.getLastModified() ; size = con.getContentLength(); // if (lastMod==0) { // System.out.println(url+" lastMod=0"); // state = 404; // size = 0; // } if (con instanceof HttpURLConnection) { HttpURLConnection h = (HttpURLConnection) con; // System.out.println(" Request Method: " + h.getRequestMethod()); // System.out.println(" Response Message: " + h.getResponseMessage()); // System.out.println(" Response Code: " + h.getResponseCode()); state=h.getResponseCode(); } writeurlState(url,state, size, lastMod); if (state!=200) { return null; } else{ return con; } } catch (MalformedURLException e) { state = -1; System.err.println("Invalid URL "+e+" "+url); writeurlState(url,state, size, lastMod); return null; } catch (FileNotFoundException e) { state = 404; System.err.println("Error reading URL 404 "+e+" "+url); writeurlState(url,state, size, lastMod); return null; } catch (IOException e) { int p1 = e.toString().indexOf("response code: "); try { if (p1>0){ state = Integer.parseInt(e.toString().substring(p1+15,p1+18)); } else state = -1; } catch(Exception ee){ state = -1; } System.err.println("Error reading URL "+e+" "+url); writeurlState(url,state, size, lastMod); return null; } catch (Exception e) { state = -1; System.err.println("Error "+e+" "+url); writeurlState(url,state, size, lastMod); return null; } catch (Throwable e) { state = -1; System.err.println("Error "+e+" "+url); writeurlState(url,state, size, lastMod); return null; } } public void writeurlState(String theUrl, int st, int sz, long lastmod){ //theUrl should exist in urlHt; try { ((urlDetail)urlHt.get(theUrl)).updateState(st,sz,lastmod); }catch(NullPointerException e){ System.out.println("currentURL not exist in urlHt when writeurlState:"+theUrl); } } /** This method is called from within the constructor to * initialize the form. * WARNING: Do NOT modify this code. The content of this method is * always regenerated by the Form Editor. */ // //GEN-BEGIN:initComponents private void initComponents() { DisplayButtonGroup = new javax.swing.ButtonGroup(); reportFileChooser = new javax.swing.JFileChooser(); TopPanel = new javax.swing.JPanel(); UrlPanel = new javax.swing.JPanel(); UrlLabel = new javax.swing.JLabel(); UrlTextField = new javax.swing.JTextField(); jSeparator3 = new javax.swing.JSeparator(); jSeparator5 = new javax.swing.JSeparator(); SiteCheckBox = new javax.swing.JCheckBox(); jSeparator4 = new javax.swing.JSeparator(); MaxPagesLabel = new javax.swing.JLabel(); MaxPagesTextField = new javax.swing.JTextField(); StartButton = new javax.swing.JButton(); SortPanel = new javax.swing.JPanel(); SortbyLabel = new javax.swing.JLabel(); SortComboBox = new javax.swing.JComboBox(); jSeparator1 = new javax.swing.JSeparator(); ExplorerRadioButton = new javax.swing.JRadioButton(); SummaryRadioButton = new javax.swing.JRadioButton(); SourceRadioButton = new javax.swing.JRadioButton(); jSeparator2 = new javax.swing.JSeparator(); SaveReportButton = new javax.swing.JButton(); BottomPanel = new javax.swing.JPanel(); SplitPane = new javax.swing.JSplitPane(); jScrollPaneLeft = new javax.swing.JScrollPane(); UrlList = new javax.swing.JList(); jScrollPaneRight = new javax.swing.JScrollPane(); DisplayPanel = new javax.swing.JTextPane(); ToolsPanel = new javax.swing.JPanel(); ToolsLabel = new javax.swing.JLabel(); TitleButton = new javax.swing.JButton(); KeywordButton = new javax.swing.JButton(); SaveButton = new javax.swing.JButton(); StatusPanel = new javax.swing.JPanel(); StatusLabel = new javax.swing.JLabel(); getContentPane().setLayout(new java.awt.BorderLayout(0, 10)); setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); setTitle("SiteTool--Yefei Peng"); TopPanel.setLayout(new java.awt.GridLayout(2, 1, 0, 2)); TopPanel.setBorder(new javax.swing.border.EtchedBorder()); TopPanel.setMinimumSize(new java.awt.Dimension(400, 20)); TopPanel.setPreferredSize(new java.awt.Dimension(800, 60)); UrlPanel.setLayout(new javax.swing.BoxLayout(UrlPanel, javax.swing.BoxLayout.X_AXIS)); UrlLabel.setText("Seed URL"); UrlPanel.add(UrlLabel); UrlTextField.setText("http://www.sis.pitt.edu/~spring/"); UrlTextField.setPreferredSize(new java.awt.Dimension(150, 23)); UrlTextField.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { UrlTextFieldActionPerformed(evt); } }); UrlPanel.add(UrlTextField); jSeparator3.setOrientation(javax.swing.SwingConstants.VERTICAL); UrlPanel.add(jSeparator3); jSeparator5.setOrientation(javax.swing.SwingConstants.VERTICAL); UrlPanel.add(jSeparator5); SiteCheckBox.setSelected(true); SiteCheckBox.setText("Inside Site"); SiteCheckBox.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { SiteCheckBoxActionPerformed(evt); } }); UrlPanel.add(SiteCheckBox); jSeparator4.setOrientation(javax.swing.SwingConstants.VERTICAL); UrlPanel.add(jSeparator4); MaxPagesLabel.setText("Max Pages"); UrlPanel.add(MaxPagesLabel); MaxPagesTextField.setText(Integer.toString(MaxPages)); MaxPagesTextField.setMaximumSize(new java.awt.Dimension(50, 30)); MaxPagesTextField.setPreferredSize(new java.awt.Dimension(50, 23)); UrlPanel.add(MaxPagesTextField); StartButton.setText("Start"); StartButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { StartButtonActionPerformed(evt); } }); UrlPanel.add(StartButton); TopPanel.add(UrlPanel); SortPanel.setLayout(new javax.swing.BoxLayout(SortPanel, javax.swing.BoxLayout.X_AXIS)); SortbyLabel.setText("Sort By "); SortPanel.add(SortbyLabel); SortbyLabel.getAccessibleContext().setAccessibleName("Sort By "); SortComboBox.setModel(new javax.swing.DefaultComboBoxModel(new String[] { "InLink", "OutLink", "ErrorLink", "Last Modified", "State", "Size" })); SortComboBox.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { SortComboBoxActionPerformed(evt); } }); SortPanel.add(SortComboBox); jSeparator1.setOrientation(javax.swing.SwingConstants.VERTICAL); SortPanel.add(jSeparator1); DisplayButtonGroup.add(ExplorerRadioButton); ExplorerRadioButton.setText("Explorer"); ExplorerRadioButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { ExplorerRadioButtonActionPerformed(evt); } }); SortPanel.add(ExplorerRadioButton); DisplayButtonGroup.add(SummaryRadioButton); SummaryRadioButton.setSelected(true); SummaryRadioButton.setText("Summary"); SummaryRadioButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { SummaryRadioButtonActionPerformed(evt); } }); SortPanel.add(SummaryRadioButton); DisplayButtonGroup.add(SourceRadioButton); SourceRadioButton.setText("Source"); SourceRadioButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { SourceRadioButtonActionPerformed(evt); } }); SortPanel.add(SourceRadioButton); jSeparator2.setOrientation(javax.swing.SwingConstants.VERTICAL); SortPanel.add(jSeparator2); SaveReportButton.setText("Save Report"); SaveReportButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { SaveReportButtonActionPerformed(evt); } }); SortPanel.add(SaveReportButton); TopPanel.add(SortPanel); getContentPane().add(TopPanel, java.awt.BorderLayout.NORTH); BottomPanel.setLayout(new java.awt.BorderLayout()); BottomPanel.setBorder(new javax.swing.border.EtchedBorder()); jScrollPaneLeft.setMinimumSize(new java.awt.Dimension(0, 200)); jScrollPaneLeft.setPreferredSize(new java.awt.Dimension(400, 800)); UrlList.setModel(hrefModel); UrlList.setMaximumSize(new java.awt.Dimension(1000, 2000)); UrlList.setMinimumSize(new java.awt.Dimension(200, 400)); UrlList.setPreferredSize(new java.awt.Dimension(400, 600)); UrlList.addListSelectionListener(new javax.swing.event.ListSelectionListener() { public void valueChanged(javax.swing.event.ListSelectionEvent evt) { UrlListValueChanged(evt); } }); UrlList.addMouseListener(new java.awt.event.MouseAdapter() { public void mouseClicked(java.awt.event.MouseEvent evt) { UrlListMouseClicked(evt); } }); jScrollPaneLeft.setViewportView(UrlList); SplitPane.setLeftComponent(jScrollPaneLeft); jScrollPaneRight.setMinimumSize(new java.awt.Dimension(0, 200)); DisplayPanel.setEditable(false); DisplayPanel.setMaximumSize(new java.awt.Dimension(1000, 2000)); DisplayPanel.setMinimumSize(new java.awt.Dimension(0, 200)); DisplayPanel.setPreferredSize(new java.awt.Dimension(400, 800)); jScrollPaneRight.setViewportView(DisplayPanel); SplitPane.setRightComponent(jScrollPaneRight); BottomPanel.add(SplitPane, java.awt.BorderLayout.CENTER); ToolsPanel.setLayout(new javax.swing.BoxLayout(ToolsPanel, javax.swing.BoxLayout.Y_AXIS)); ToolsPanel.setBorder(new javax.swing.border.EtchedBorder()); ToolsPanel.setMinimumSize(new java.awt.Dimension(20, 200)); ToolsPanel.setPreferredSize(new java.awt.Dimension(90, 600)); ToolsLabel.setText("Editor Tools"); ToolsPanel.add(ToolsLabel); TitleButton.setLabel("
Change
Title
"); TitleButton.setMaximumSize(new java.awt.Dimension(100, 40)); TitleButton.setMinimumSize(new java.awt.Dimension(80, 40)); TitleButton.setPreferredSize(new java.awt.Dimension(80, 40)); TitleButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { TitleButtonActionPerformed(evt); } }); ToolsPanel.add(TitleButton); KeywordButton.setText("
Change
Keyword
"); KeywordButton.setMaximumSize(new java.awt.Dimension(100, 40)); KeywordButton.setMinimumSize(new java.awt.Dimension(100, 40)); KeywordButton.setPreferredSize(new java.awt.Dimension(80, 40)); KeywordButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { KeywordButtonActionPerformed(evt); } }); ToolsPanel.add(KeywordButton); SaveButton.setText("Save"); SaveButton.setMaximumSize(new java.awt.Dimension(100, 27)); SaveButton.setMinimumSize(new java.awt.Dimension(80, 27)); SaveButton.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { SaveButtonActionPerformed(evt); } }); ToolsPanel.add(SaveButton); BottomPanel.add(ToolsPanel, java.awt.BorderLayout.EAST); getContentPane().add(BottomPanel, java.awt.BorderLayout.CENTER); StatusPanel.setLayout(new javax.swing.BoxLayout(StatusPanel, javax.swing.BoxLayout.X_AXIS)); StatusPanel.setBorder(new javax.swing.border.EtchedBorder()); StatusLabel.setText("Status:"); StatusPanel.add(StatusLabel); getContentPane().add(StatusPanel, java.awt.BorderLayout.SOUTH); pack(); } //
//GEN-END:initComponents private void UrlTextFieldActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_UrlTextFieldActionPerformed // TODO add your handling code here: StartButtonActionPerformed(evt); }//GEN-LAST:event_UrlTextFieldActionPerformed private void UrlListValueChanged(javax.swing.event.ListSelectionEvent evt) {//GEN-FIRST:event_UrlListValueChanged // TODO add your handling code here: //if the list is empty, do nothing. if(hrefModel.size()==0) return; String s; setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) ); s = (String)UrlList.getSelectedValue(); currentURL = getUrlByListStr(s); updateDisplay(currentURL); setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) ); }//GEN-LAST:event_UrlListValueChanged private void SiteCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SiteCheckBoxActionPerformed // TODO add your handling code here: if(InsideSite==1) InsideSite=0; else InsideSite=1; }//GEN-LAST:event_SiteCheckBoxActionPerformed private void SaveButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SaveButtonActionPerformed // TODO add your handling code here: String s; if(DisplayPanel.getText().equals("") || currentURL.equals("")){ this.statusDisplay("Choose a page first, please."); return; } if((( urlDetail)urlHt.get(currentURL)).isBadLink()){ this.statusDisplay("Bad Link. Choose a good page, please."); return; } if (this.displayType!=2){//fetch URL and save s=fetchURL(currentURL); } else{ s=DisplayPanel.getText(); } if(s==null){ statusDisplay("Content is empty."); return; } reportFileChooser.addChoosableFileFilter(new HtmlFilter()); int returnVal = reportFileChooser.showSaveDialog(SiteTool.this); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = reportFileChooser.getSelectedFile(); try { PrintWriter dataFile = new PrintWriter(new FileWriter(file)); dataFile.println(s); dataFile.close(); } catch(Exception ee) { ee.printStackTrace(); } } }//GEN-LAST:event_SaveButtonActionPerformed private void KeywordButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_KeywordButtonActionPerformed // TODO add your handling code here: if(DisplayPanel.getText().equals("") || currentURL.equals("")){ this.statusDisplay("Choose a page first, please."); return; } if((( urlDetail)urlHt.get(currentURL)).isBadLink()){ this.statusDisplay("Bad Link. Choose a good page, please."); return; } String content; //set source mode; if(this.displayType!=2){ this.displayType=2; SourceRadioButton.setSelected(true); StyledEditorKit sek = new StyledEditorKit(); //DefaultEditorKit sek= new DefaultEditorKit(); DisplayPanel.setEditorKit(sek); DisplayPanel.setContentType("text/plain"); content=fetchURL(currentURL); } else{ content=DisplayPanel.getText(); } if (content==null) return; DisplayPanel.setText(""); String oldkeywords=(( urlDetail)urlHt.get(currentURL)).getKeywords(); String newkeywords = (String)JOptionPane.showInputDialog(null, "Old Keywords: "+oldkeywords+"\nType New Keywords Here: ", "Change Keywords",JOptionPane.INFORMATION_MESSAGE,null,null,""); if(newkeywords==null) return; newkeywords=newkeywords.trim(); // String content=fetchURL(currentURL); String lt = content.toLowerCase(); StringBuffer sBuf = new StringBuffer(); String aux=null; int startpos=0, endpos=0; if (!oldkeywords.equals("No Key Words")) { int pos = myFind(lt,"meta"); pos = content.indexOf(oldkeywords,pos); sBuf.append(content.substring(0,pos)); sBuf.append(" "); sBuf.append(newkeywords); sBuf.append(content.substring(pos+oldkeywords.length()+1)); } else { int p=-1,pos=0; while (true) { pos = myFind(lt,"meta"); if (pos>0) {p=lt.lastIndexOf('<',pos);break;} pos = myFind(lt,"head"); if (pos>0) {p=lt.indexOf('>',pos)+1;break;} aux="head"; // add head tag pos = myFind(lt,"html"); if (pos>0) {p=lt.indexOf('>',pos)+1;break;} p=0; } sBuf.append(content.substring(0,p)); if (aux!=null) sBuf.append("<"+aux+">\n"); sBuf.append("\n"); if (aux!=null) sBuf.append("\n"); sBuf.append(content.substring(p)); } DisplayPanel.setText(sBuf.toString()); Document displayDoc =DisplayPanel.getDocument(); String docStr=new String(); try { docStr=displayDoc.getText(0,displayDoc.getLength()); }catch(Exception e){ } if (!oldkeywords.equals("No Key Words")) { int pos = myFind(docStr.toLowerCase(),"meta"); pos = docStr.toLowerCase().indexOf(oldkeywords,pos); startpos=pos; endpos=startpos+oldkeywords.length()+1+newkeywords.length(); } else{ startpos=docStr.indexOf("',pos)+1; p2 = lt.indexOf('>',p2+1)+1; break; } pos = myFind(lt,"head"); if (pos>0) { p1 = lt.indexOf('>',pos)+1; p2 = p1; break; } aux="head"; // no head tag pos = myFind(lt,"html"); if (pos>0) { p1 = lt.indexOf('>',pos)+1; p2 = p1; break; } p1=0;p2=0; } sBuf.append(content.substring(0,p1)); if (aux!=null) sBuf.append("<"+aux+">\n"); sBuf.append(""+newtitle+"\n"); if (aux!=null) sBuf.append("\n"); sBuf.append(content.substring(p2)); DisplayPanel.setText(sBuf.toString()); Document displayDoc =DisplayPanel.getDocument(); String docStr=new String(); try { docStr=displayDoc.getText(0,displayDoc.getLength()); }catch(Exception e){ } int startpos,endpos; pos = docStr.toLowerCase().indexOf(new String(""+newtitle)); startpos=pos+(new String("<title>")).length(); endpos=startpos+(new String(newtitle)).length(); DisplayPanel.setCaretPosition(0); DisplayPanel.select(startpos,endpos); DisplayPanel.setEditable(true); //DisplayPanel.setSelectionColor(Color.gray); //DisplayPanel.setSelectedTextColor(Color.red) ; DisplayPanel.grabFocus(); repaint(); } }//GEN-LAST:event_TitleButtonActionPerformed private int myFind(String str, String s) { int len = s.length(); int pos=0; while ((pos = str.indexOf(s,pos))>0) { int p1 = str.lastIndexOf('<',pos); if (p1<0) {pos+=len;continue;} String tmp = str.substring(p1+1,pos).trim(); if (tmp.equals("")) break; pos+=len; } return pos; } private void UrlListMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_UrlListMouseClicked // TODO add your handling code here: //if the list is empty, do nothing. // if(hrefModel.size()==0) // return; // // String s; // // setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) ); // // s = (String)UrlList.getSelectedValue(); // currentURL = getUrlByListStr(s); // // updateDisplay(currentURL); // // // setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) ); }//GEN-LAST:event_UrlListMouseClicked private void SaveReportButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SaveReportButtonActionPerformed // TODO add your handling code here: reportFileChooser.addChoosableFileFilter(new HtmlFilter()); int returnVal = reportFileChooser.showSaveDialog(SiteTool.this); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = reportFileChooser.getSelectedFile(); try { PrintWriter dataFile = new PrintWriter(new FileWriter(file)); dataFile.println("<html><head><title>Spider Result"); dataFile.println("

Spider Result of "+seedURL+"

"); for (Iterator it=urlHs.iterator();it.hasNext();){ dataFile.println("

"+((String)it.next())+"

"); } dataFile.println(""); dataFile.close(); } catch(Exception ee) { ee.printStackTrace(); } } }//GEN-LAST:event_SaveReportButtonActionPerformed private void SourceRadioButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SourceRadioButtonActionPerformed // TODO add your handling code here: if(this.displayType!=2){ this.displayType=2; updateDisplay(currentURL); } }//GEN-LAST:event_SourceRadioButtonActionPerformed private void SummaryRadioButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SummaryRadioButtonActionPerformed // TODO add your handling code here: if(this.displayType!=1){ this.displayType=1; updateDisplay(currentURL); } }//GEN-LAST:event_SummaryRadioButtonActionPerformed private void ExplorerRadioButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ExplorerRadioButtonActionPerformed if(this.displayType!=0){ this.displayType=0; updateDisplay(currentURL); } }//GEN-LAST:event_ExplorerRadioButtonActionPerformed private void SortComboBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SortComboBoxActionPerformed // TODO add your handling code here: int newvalue=SortComboBox.getSelectedIndex(); // System.out.println("new value="+newvalue); if( this.sortbyType == newvalue){ return; } else{ this.sortbyType=newvalue; if(resultList.size()!=0){ sortUrl(this.sortbyType); generateList(); //pack(); show(); } } }//GEN-LAST:event_SortComboBoxActionPerformed private void StartButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_StartButtonActionPerformed // TODO add your handling code here: MaxPages= Integer.valueOf(MaxPagesTextField.getText()).intValue(); if(MaxPages<=0){ statusDisplay("Invalid MaxPages!"); return; } else{ // statusDisplay("Crawling......Please Wait......"); // repaint(); getPages(); statusDisplay("Finished Crawling."); } }//GEN-LAST:event_StartButtonActionPerformed /**Write s to statusbar */ private void statusDisplay(String s){ this.StatusLabel.setText(s); } /**Display URL content in the DisplayPanel *type=0:html; 1:image; */ private void explorerDisplay(String url, int type){ StyledEditorKit sek = new StyledEditorKit(); DisplayPanel.setEditorKit(sek); DisplayPanel.setContentType("text/plain"); DisplayPanel.setContentType("text/html"); DisplayPanel.setEditable(false); if(type==0){ try{ DisplayPanel.setPage(url); DisplayPanel.setCaretPosition(0); }catch(IOException e){ System.out.println("explorer display ioexception: "+url); } catch(Exception ee) { System.out.println("explorer display other exception: "+url); ee.printStackTrace(); } } else if(type==1){ StringBuffer sBuf = new StringBuffer(); if(!urlHt.containsKey(url)){ System.out.println("url not exist in urlHt: "+url); return; } urlDetail tempDetail=(urlDetail)urlHt.get(url); sBuf.append(tempDetail.getDisplayText()); DisplayPanel.setText(sBuf.toString()); } } private void summaryDisplay(String url, int type){ //image or not is processed in urlDetail.getDisplayText, //so don't worry here; StringBuffer sBuf = new StringBuffer(); if(!urlHt.containsKey(url)){ System.out.println("url not exist in urlHt: "+url); return; } DisplayPanel.setContentType("text/html"); DisplayPanel.setEditable(false); urlDetail tempDetail=(urlDetail)urlHt.get(url); sBuf.append(tempDetail.getDisplayText()); DisplayPanel.setText(sBuf.toString()); DisplayPanel.setCaretPosition(0); } /**Display Source */ private void sourceDisplay(String url, int type){ StyledEditorKit sek = new StyledEditorKit(); DisplayPanel.setEditorKit(sek); DisplayPanel.setContentType("text/plain"); if(type==0){ DisplayPanel.setEditable(true); String text = fetchURL(url); if (text!=null) { DisplayPanel.setText(text); DisplayPanel.setCaretPosition(0); } } else if(type==1){ DisplayPanel.setEditable(false); DisplayPanel.setText("Image file can not be displayed in source mode.\n Please change to other modes;"); } } private void displayBadLink(String url){ DisplayPanel.setContentType("text/html"); DisplayPanel.setEditable(false); StringBuffer sBuf = new StringBuffer(); if(!urlHt.containsKey(url)){ System.out.println("url not exist in urlHt: "+url); return; } // sBuf.append("This is a bad link:"+url); sBuf.append(""+((urlDetail)urlHt.get(url)).getDisplayText()); DisplayPanel.setText(sBuf.toString()); } /** Update DiaplayPanel */ private void updateDisplay(String url){ statusDisplay(url); int type=0;//html try{ urlDetail tempDetail=(urlDetail)urlHt.get(url); if(tempDetail.isBadLink()){ //bad link displayBadLink(url); show(); return; } else if(tempDetail.isImage()){ type=1;//image } }catch(NullPointerException e){ System.out.println("updateDisplay: url not exist in urlHt ["+url+"]"); } if(displayType==0){//explorer explorerDisplay(url,type); } else if(displayType==1){ summaryDisplay(url,type); } else if(displayType==2){ sourceDisplay(url,type); } else{ } show(); } /** * @param args the command line arguments */ public static void main(String args[]) { try { UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName()); } catch(Exception e) { e.printStackTrace(); } java.awt.EventQueue.invokeLater(new Runnable() { public void run() { new SiteTool().setVisible(true); } }); } // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JPanel BottomPanel; private javax.swing.ButtonGroup DisplayButtonGroup; private javax.swing.JTextPane DisplayPanel; private javax.swing.JRadioButton ExplorerRadioButton; private javax.swing.JButton KeywordButton; private javax.swing.JLabel MaxPagesLabel; private javax.swing.JTextField MaxPagesTextField; private javax.swing.JButton SaveButton; private javax.swing.JButton SaveReportButton; private javax.swing.JCheckBox SiteCheckBox; private javax.swing.JComboBox SortComboBox; private javax.swing.JPanel SortPanel; private javax.swing.JLabel SortbyLabel; private javax.swing.JRadioButton SourceRadioButton; private javax.swing.JSplitPane SplitPane; private javax.swing.JButton StartButton; private javax.swing.JLabel StatusLabel; private javax.swing.JPanel StatusPanel; private javax.swing.JRadioButton SummaryRadioButton; private javax.swing.JButton TitleButton; private javax.swing.JLabel ToolsLabel; private javax.swing.JPanel ToolsPanel; private javax.swing.JPanel TopPanel; private javax.swing.JLabel UrlLabel; private javax.swing.JList UrlList; private javax.swing.JPanel UrlPanel; private javax.swing.JTextField UrlTextField; private javax.swing.JScrollPane jScrollPaneLeft; private javax.swing.JScrollPane jScrollPaneRight; private javax.swing.JSeparator jSeparator1; private javax.swing.JSeparator jSeparator2; private javax.swing.JSeparator jSeparator3; private javax.swing.JSeparator jSeparator4; private javax.swing.JSeparator jSeparator5; private javax.swing.JFileChooser reportFileChooser; // End of variables declaration//GEN-END:variables } class urlDetail extends Object implements Cloneable{ private String selfUrl; private HashSet fromHs, toHs, errHs; private int statusCode, fromSize, toSize, errSize, size; private long lastMod; private String title, meta, keywords; public Object clone() { Object o = null; try { o = super.clone(); } catch (CloneNotSupportedException e) { System.out.println("urlDetail can't clone."); } return o; } public void addTo(String to){ toHs.add(to); } public void addFrom(String from){ fromHs.add(from); } public urlDetail(String self){ //initialize this.selfUrl=new String(self); fromHs = new HashSet(); toHs = new HashSet(); errHs = new HashSet(); statusCode = 0; size = 0; lastMod = 0; fromSize = 0; toSize = 0; errSize=0; title=new String(); meta=new String(); keywords=new String(); } public urlDetail(){ this(""); } public HashSet getToHs(){ return toHs; } public int getStatus(){ return this.statusCode; } public String getTitle(){ return this.title; } public String getKeywords(){ return this.keywords; } public void setErrHs(HashSet hs){ errHs=(HashSet)hs.clone(); } // public void createErrHs(){ // // // } public void updateNumbers(){ fromSize=fromHs.size(); toSize=toHs.size(); errSize=errHs.size(); } public boolean isImage(){ //image //in future, should modify as .jpg appear in the end of url. if(selfUrl.indexOf(".jpg")>=0||selfUrl.indexOf(".gif")>=0||selfUrl.indexOf(".JPG")>=0||selfUrl.indexOf(".GIF")>=0){ return true; } else{ return false; } } public boolean isBadLink(){ if(this.statusCode==200){ return false; } else{ return true; } } public void updateState(int stateCode,int size, long lastMod){ this.statusCode=stateCode; this.size=size; this.lastMod=lastMod; } public void updateHead(String title,String keywords,String meta){ this.title=title; this.keywords=keywords; this.meta=meta; } public String getDisplayText(){ StringBuffer sBuf=new StringBuffer(); if(isBadLink()){ sBuf.append( "

This is a bad link!

"); } //image if(isImage()){ sBuf.append("URL: ["+selfUrl+"]"); sBuf.append("
"); return sBuf.toString(); } sBuf.append("URL: ["+selfUrl+"]"); sBuf.append("
Title: ["+title+"]"); sBuf.append("
KeyWords: ["+keywords+"]"); sBuf.append("
Meta: ["+meta+"]"); sBuf.append("
Size: ["+size+" Bytes]"); sBuf.append("
Last Modified: ["+new Date(lastMod)+"]"); sBuf.append("
In Links: ["+fromSize+"]
    "); for(Iterator it=this.fromHs.iterator();it.hasNext();){ sBuf.append("
  1. "+(String)it.next()+"
  2. "); } sBuf.append("
"); sBuf.append("Out Links: ["+toSize+"]
    "); for(Iterator it=this.toHs.iterator();it.hasNext();){ sBuf.append("
  1. "+(String)it.next()+"
  2. "); } sBuf.append("
"); sBuf.append("Error Links: ["+errSize+"]
    "); for(Iterator it=this.errHs.iterator();it.hasNext();){ sBuf.append("
  1. "+(String)it.next()+"
  2. "); } sBuf.append("
"); // return sBuf.toString(); } public String getUrl(){ return this.selfUrl; } public int getIn(){ return this.fromSize; } public int getOut(){ return this.toSize; } public int getErr(){ return this.errSize; } public long getLastmod(){ return this.lastMod; } public int getSize(){ return this.size; } public String toString() { String s=""+this.selfUrl+" state:["+this.statusCode+"] in:["+fromSize+"] out:["+toSize+ "] error:["+errSize+"] size:["+size+"] lastMod:["; if (lastMod>0) { s=s+(new Date(lastMod)); } s+="]\n"; return s; } } class urlComparer implements Comparator { public int compare(Object o1, Object o2) { String url1=((urlDetail)o1).getUrl(); String url2=((urlDetail)o2).getUrl(); return url1.compareTo(url2); } } class stateComparer implements Comparator { public int compare(Object o1, Object o2) { int state1=((urlDetail)o1).getStatus(); int state2=((urlDetail)o2).getStatus(); return (state2-state1); } } class inComparer implements Comparator { public int compare(Object o1, Object o2) { return ((urlDetail)o2).getIn()-((urlDetail)o1).getIn(); } } class sizeComparer implements Comparator { public int compare(Object o1, Object o2) { return ((urlDetail)o2).getSize()-((urlDetail)o1).getSize(); } } class outComparer implements Comparator { public int compare(Object o1, Object o2) { return ((urlDetail)o2).getOut()-((urlDetail)o1).getOut(); } } class errComparer implements Comparator { public int compare(Object o1, Object o2) { return ((urlDetail)o2).getErr()-((urlDetail)o1).getErr(); } } class lastmodComparer implements Comparator { public int compare(Object o1, Object o2) { long l=((urlDetail)o1).getLastmod()-((urlDetail)o2).getLastmod(); if(l>0 ) return -1; else if(l==0) return 0; else return 1; } } class HtmlFilter extends javax.swing.filechooser.FileFilter { final static String html = "html"; final static String htm = "htm"; // Accept all directories and all html,htm files. public boolean accept(File f) { if (f.isDirectory()) { return true; } String s = f.getName(); int i = s.lastIndexOf('.'); if (i > 0 && i < s.length() - 1) { String extension = s.substring(i+1).toLowerCase(); if (htm.equals(extension) || html.equals(extension) ) { return true; } else { return false; } } return false; } // The description of this filter public String getDescription() { return "HTML Files"; } }