/*
* SiteTool.java
*
* Created on 2005Äê4ÔÂ28ÈÕ, ÏÂÎç1:18
*/
/**
*
* @author Administrator
*/
import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import java.awt.*; //for basic events
import java.io.*; //for io exception
import java.net.*; //for URLS
import java.util.*; //for hash
import javax.swing.JOptionPane; //for joptionpane
import javax.swing.event.*; //for hypertextlistener
import javax.swing.text.*;// for bad location exception
import javax.swing.text.html.*; //for html editor kit
import javax.swing.DefaultListModel; // model for the jList
import javax.swing.ImageIcon; //for image icon
import java.io.File;
import javax.swing.*;
import javax.swing.filechooser.*;
public class SiteTool extends javax.swing.JFrame {
/** Creates new form SiteTool */
public SiteTool() {
hrefModel = new DefaultListModel();
//default values
MaxPages=30;
IncludeImage=0;
InsideSite=1;
initComponents();
urlHs = new HashSet();
urlList=new ArrayList();
urlHt=new Hashtable();
resultList=new ArrayList();
this.jScrollPaneLeft.getViewport().add(UrlList, null);
//link urlList to hrefModel
//UrlList.setModel(hrefModel);
//UrlList.setValueIsAdjusting(false);
}
//urlHs is to garauntee that no repeat url will be saved to urlList
private HashSet urlHs;
private ArrayList urlList;
//key is each url, value is urlDetail class;
private Hashtable urlHt;
//sorted list of urlDetail;
private ArrayList resultList;
//Spider will go through all url in this hrefModel
//to fetch new url and then save them to hrefModel.
//The loop is located in getPages()
private DefaultListModel hrefModel;
private int PageIndex, MaxPages;
private int IncludeImage, InsideSite;
private String seedURL, seedBase, currentURL;
public int state;
public int size;
public long lastMod = 0;
public String title;
private int displayType=1;//0:explorer; 1: summary; 2: source
private int sortbyType=0;//0-4: InLink, OutLink, ErrorLink, Last Modified, State
/** because string in the list is not only url, also in #, out#, err#
* we need to process list string to get url
* the idea substring before the first " " is url;
*/
String getUrlByListStr(String listStr){
int pos1=listStr.indexOf("");
int pos2=listStr.indexOf(" state:");
// System.out.println("pos1= "+pos1+" pos2= "+pos2);
if (pos2 <=0 || pos1<0){
return " ";
}
else{
return listStr.substring(pos1+6,pos2);
}
}
private boolean isValidUrl(String url){
if(url.startsWith("http://"))
return true;
else
return false;
}
private String getSeedBase(String url){
int pos=url.indexOf("/", 7);
if(pos<=0)
return url;
else
return url.substring(0,pos);
}
//Spider start to run!
void getPages()
{
seedURL=UrlTextField.getText();
if(!isValidUrl(seedURL)){
statusDisplay("Seed URL is not valid!");
return;
}
seedBase=getSeedBase(seedURL);
PageIndex=0;
hrefModel.clear();
urlHs.clear();
urlList.clear();
urlHt.clear();
urlHs.add(seedURL);
urlList.add(seedURL);
urlHt.put(new String(seedURL), new urlDetail(seedURL));
while (PageIndex0) {
p1 = myText.indexOf('>',p1);
p2 = myText.indexOf('<',p1);
if (p2>0) title = myText.substring(p1+1,p2).trim();
}
int m1,m2,m3;
m1 = myText.indexOf("0){
m3 = myText.indexOf('>',m1);
if(m3>0)
meta=myText.substring(m1+5,m3-1);
}
p1 = myText.indexOf("keywords");
while (p1>0) {
m1 = myText.lastIndexOf("',p1);
if (m3<0||m2>m3) break;
meta=myText.substring(p1+m1,p1+m3);
p1 = myText.indexOf('\"',m2);
if (p1<0) p1 = myText.indexOf('\'',m2);
if (p1<0) p1 = myText.indexOf('=',m2);
p2 = myText.indexOf('\"',p1+1);
if (p2<0) p2 = myText.indexOf('\'',p1+1);
if (p1>0&&p2>0) keywords = myText.substring(p1+1,p2).trim();
}
//save
//if(urlHt.containsKey(currentURL)){
try{ ((urlDetail)urlHt.get(currentURL)).updateHead(title,keywords,meta); }
catch(NullPointerException e){
System.out.println("currentURL not exist in urlHt:"+currentURL);
}
}
boolean isImage(String url){
//image
//in future, should modify as .jpg appear in the end of url.
if(url.indexOf(".jpg")>=0||url.indexOf(".gif")>=0||url.indexOf(".JPG")>=0||url.indexOf(".GIF")>=0){
return true;
}
else{
return false;
}
}
private void myGetLinks(String SURL,String contentStr)
{
//? why set cursor? I forgot.
setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) );
javax.swing.text.html.HTMLEditorKit htmlEditorKit = new javax.swing.text.html.HTMLEditorKit();
HTMLDocument htmlDoc = new HTMLDocument();
Reader htmlReader = null;
String sbase, newURL;
try{
//URL myurl = new URL(SURL);
//htmlReader = new InputStreamReader(myurl.openConnection().getInputStream());
//htmlReader = new InputStreamReader(connect.getInputStream());
htmlReader = new StringReader(contentStr);
htmlEditorKit.read(htmlReader, htmlDoc, 0);
}
catch (ChangedCharSetException e){
//fetchURL() is to readin the content of a HTML
//String urlString = fetchURL(SURL);
//System.out.println("changed char set: " + e.getCharSetSpec());
int pos = contentStr.toLowerCase().indexOf("charset") ;
String htmlSource = contentStr.substring(pos+11);
htmlReader = new StringReader(htmlSource);
try
{htmlEditorKit.read(htmlReader, htmlDoc,0); }
catch (IOException ie){
System.err.println("iread"+ie);
}
catch (BadLocationException ie){
System.err.println("iloc"+ie);
}
}
catch (BadLocationException e){
System.err.println("bloc"+e);
}
catch (MalformedURLException murle){
System.err.println("MURL"+murle);
}
catch (IOException e){
System.err.println("IOE"+e);
}
catch (Exception ee){
System.err.println("Other Exception"+ee);
return;
}
URL base=htmlDoc.getBase();//base = "http://......./.../"
if (base==null){
sbase = fabricatebase(SURL);//create a sbase when it is null
}
else{
sbase=base.toString();
}
//add source when it is in frame or img
ElementIterator it = new ElementIterator(htmlDoc);//tag iterator
javax.swing.text.Element elem;
while ((elem = it.next()) != null){
if(elem.getName().equalsIgnoreCase("frame")){
String href;
if ((href = (String)elem.getAttributes().getAttribute (HTML.Attribute.SRC)) != null){
gotURL(sbase, href);
}
}
if (IncludeImage==1){
if (elem.getName().equalsIgnoreCase("img")){
String href;
if ((href = (String)elem.getAttributes().getAttribute (HTML.Attribute.SRC)) != null){
gotURL(sbase, href);
}
}
}//end IncludeImage=1
}//end while
//add source when it is in
HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A);
while(iterator.isValid()){
String href = (String)iterator.getAttributes().getAttribute(HTML.Attribute.HREF);
if (href!=null){
gotURL(sbase, href);
}
iterator.next();
}//end while
setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) );
}
private void gotURL(String sbase, String href){
String absurl = normalizeURL(sbase, href);
if(absurl==null)
return;
int i;
if (absurl.indexOf(".pdf")>=0 || absurl.indexOf(".ps")>=0 ||
absurl.indexOf(".xml")>=0 || absurl.indexOf("mailto")>=0 ||
absurl.indexOf(".htm")<=0 || absurl.indexOf(".html")<=0 ){
modifyFromTo(currentURL, absurl,1);//only add to, not from
return;
}
if (InsideSite == 1){//inside site
//String seedURL=textField.getText();
if (absurl.startsWith(this.seedBase) && !urlHs.contains(absurl)){
urlHs.add(absurl);//add absurl to urlHs
urlList.add(absurl);
// System.out.println("url added: "+absurl);
modifyFromTo(currentURL, absurl, 0);//add from &to
//model.addElement(absurl);//add absurl to urlHs, a DefaultListModel
}
else{//outsite
//don't add new urlDetail for new url.
modifyFromTo(currentURL, absurl,1);//only add to, not from
}
}
else{//not only inside site, it may be outside site
if (!urlHs.contains(absurl)){
urlHs.add(absurl);
urlList.add(absurl);
// System.out.println("url added: "+absurl);
modifyFromTo(currentURL, absurl,0);
//model.addElement(absurl);
}
}
}
//normalize URL and add URL to urlHs, and urlList
//normalizeURL(sbase, href)
private String normalizeURL(String base, String in)
{
int i;
String absurl,s,front1, front2,back,temp;
if ( urlList.size()>=MaxPages)
return null;
s=in;
if ((i = in.indexOf('#'))>=0)//fragement
s = in.substring(0,i);
if ((i = in.indexOf('?'))>=0)//query
s = in.substring(0,i);
if (in.indexOf('+')>=0 || in.indexOf('%')>=0){//decode
try{
in = URLDecoder.decode(in);
}catch(Exception e) {}
}
if ( IncludeImage==0){
if ((i = in.indexOf(".jpg"))>=0 || (i = in.indexOf(".JPG"))>=0)
return null;
if ((i = in.indexOf(".gif"))>=0 || (i = in.indexOf(".GIF"))>=0)
return null;
}
if (s.startsWith("http://"))
absurl=s.trim();
else{
if (s.indexOf("://")>0) return null; //file,ftp,gopher etc.
absurl=base.trim()+s.trim();
}
while ((i = absurl.indexOf("/../"))>0){//relative directory, parent level
front1 = absurl.substring(0,i);
back = absurl.substring(i+3);
i = front1.lastIndexOf('/');
front2 = front1.substring(0,i);
absurl = front2 + back;
}
while ((i = absurl.indexOf("/./"))>0){//relative directory, self level
front1 = absurl.substring(0,i);
back = absurl.substring(i+2);
absurl = front1 + back;
}
//replace space with %20
while ((i = absurl.indexOf(" "))>0){
front1 = absurl.substring(0,i);
back = absurl.substring(i+1);
absurl = front1 + "%20"+back;
}
return absurl;
}
//read the content of a HTML
private static String fetchURL (String urlString)
{
// this routine works around character set changes -- not clear how it does it.
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
try{
URL url = new URL (urlString);
java.io.InputStream content = (InputStream)url.getContent();
BufferedReader in = new BufferedReader (new InputStreamReader (content));
String line;
while ((line = in.readLine()) != null)
{ pw.println (line); }
in.close();
pw.close();
}catch (MalformedURLException e)
{pw.println ("Invalid URL"); }
catch (IOException e)
{ pw.println ("Error reading URL"); }
catch(ClassCastException cce){
pw.println ("ClassCastException: "+urlString);
}
return sw.toString();
}// end of fetchURL
private String fabricatebase(String s)
{
String sbase;
int i = s.lastIndexOf('/');
if (i<7) sbase=s+'/';
else sbase=s.substring(0,i+1);
return sbase;
}
private void modifyFromTo(String fromURL, String toURL,int type){
urlDetail temp;// = new urlDetail();
//update from
boolean contained=urlHt.containsKey(fromURL);
if (contained){
temp = (urlDetail) urlHt.get(fromURL);
}
else{
temp=new urlDetail(fromURL);
}
temp.addTo(toURL);
//temp.toHs.add(toURL);
//temp.toSize = temp.toHs.size();
if(!contained){
urlHt.put(fromURL, temp);
}
if(type==0){
//update to
contained=urlHt.containsKey(toURL);
if (contained){
temp = (urlDetail) urlHt.get(toURL);
}
else{
temp=new urlDetail(toURL);
}
temp.addFrom(fromURL);
//temp.fromHs.add(fromURL);
//temp.fromSize = temp.fromHs.size();
if(!contained){
urlHt.put(toURL, temp);
}
}
}
private URLConnection updateurlDetail(String url){
URLConnection con;
URL myUrl;
//html
try{
myUrl = new URL(url);
con = myUrl.openConnection();
state = 200;
lastMod = con.getLastModified() ;
size = con.getContentLength();
// if (lastMod==0) {
// System.out.println(url+" lastMod=0");
// state = 404;
// size = 0;
// }
if (con instanceof HttpURLConnection) {
HttpURLConnection h = (HttpURLConnection) con;
// System.out.println(" Request Method: " + h.getRequestMethod());
// System.out.println(" Response Message: " + h.getResponseMessage());
// System.out.println(" Response Code: " + h.getResponseCode());
state=h.getResponseCode();
}
writeurlState(url,state, size, lastMod);
if (state!=200) {
return null;
}
else{
return con;
}
}
catch (MalformedURLException e) {
state = -1;
System.err.println("Invalid URL "+e+" "+url);
writeurlState(url,state, size, lastMod);
return null;
}
catch (FileNotFoundException e) {
state = 404;
System.err.println("Error reading URL 404 "+e+" "+url);
writeurlState(url,state, size, lastMod);
return null;
}
catch (IOException e) {
int p1 = e.toString().indexOf("response code: ");
try {
if (p1>0){
state = Integer.parseInt(e.toString().substring(p1+15,p1+18));
}
else state = -1;
}
catch(Exception ee){
state = -1;
}
System.err.println("Error reading URL "+e+" "+url);
writeurlState(url,state, size, lastMod);
return null;
}
catch (Exception e) {
state = -1;
System.err.println("Error "+e+" "+url);
writeurlState(url,state, size, lastMod);
return null;
}
catch (Throwable e) {
state = -1;
System.err.println("Error "+e+" "+url);
writeurlState(url,state, size, lastMod);
return null;
}
}
public void writeurlState(String theUrl, int st, int sz, long lastmod){
//theUrl should exist in urlHt;
try {
((urlDetail)urlHt.get(theUrl)).updateState(st,sz,lastmod);
}catch(NullPointerException e){
System.out.println("currentURL not exist in urlHt when writeurlState:"+theUrl);
}
}
/** This method is called from within the constructor to
* initialize the form.
* WARNING: Do NOT modify this code. The content of this method is
* always regenerated by the Form Editor.
*/
// //GEN-BEGIN:initComponents
private void initComponents() {
DisplayButtonGroup = new javax.swing.ButtonGroup();
reportFileChooser = new javax.swing.JFileChooser();
TopPanel = new javax.swing.JPanel();
UrlPanel = new javax.swing.JPanel();
UrlLabel = new javax.swing.JLabel();
UrlTextField = new javax.swing.JTextField();
jSeparator3 = new javax.swing.JSeparator();
jSeparator5 = new javax.swing.JSeparator();
SiteCheckBox = new javax.swing.JCheckBox();
jSeparator4 = new javax.swing.JSeparator();
MaxPagesLabel = new javax.swing.JLabel();
MaxPagesTextField = new javax.swing.JTextField();
StartButton = new javax.swing.JButton();
SortPanel = new javax.swing.JPanel();
SortbyLabel = new javax.swing.JLabel();
SortComboBox = new javax.swing.JComboBox();
jSeparator1 = new javax.swing.JSeparator();
ExplorerRadioButton = new javax.swing.JRadioButton();
SummaryRadioButton = new javax.swing.JRadioButton();
SourceRadioButton = new javax.swing.JRadioButton();
jSeparator2 = new javax.swing.JSeparator();
SaveReportButton = new javax.swing.JButton();
BottomPanel = new javax.swing.JPanel();
SplitPane = new javax.swing.JSplitPane();
jScrollPaneLeft = new javax.swing.JScrollPane();
UrlList = new javax.swing.JList();
jScrollPaneRight = new javax.swing.JScrollPane();
DisplayPanel = new javax.swing.JTextPane();
ToolsPanel = new javax.swing.JPanel();
ToolsLabel = new javax.swing.JLabel();
TitleButton = new javax.swing.JButton();
KeywordButton = new javax.swing.JButton();
SaveButton = new javax.swing.JButton();
StatusPanel = new javax.swing.JPanel();
StatusLabel = new javax.swing.JLabel();
getContentPane().setLayout(new java.awt.BorderLayout(0, 10));
setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
setTitle("SiteTool--Yefei Peng");
TopPanel.setLayout(new java.awt.GridLayout(2, 1, 0, 2));
TopPanel.setBorder(new javax.swing.border.EtchedBorder());
TopPanel.setMinimumSize(new java.awt.Dimension(400, 20));
TopPanel.setPreferredSize(new java.awt.Dimension(800, 60));
UrlPanel.setLayout(new javax.swing.BoxLayout(UrlPanel, javax.swing.BoxLayout.X_AXIS));
UrlLabel.setText("Seed URL");
UrlPanel.add(UrlLabel);
UrlTextField.setText("http://www.sis.pitt.edu/~spring/");
UrlTextField.setPreferredSize(new java.awt.Dimension(150, 23));
UrlTextField.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
UrlTextFieldActionPerformed(evt);
}
});
UrlPanel.add(UrlTextField);
jSeparator3.setOrientation(javax.swing.SwingConstants.VERTICAL);
UrlPanel.add(jSeparator3);
jSeparator5.setOrientation(javax.swing.SwingConstants.VERTICAL);
UrlPanel.add(jSeparator5);
SiteCheckBox.setSelected(true);
SiteCheckBox.setText("Inside Site");
SiteCheckBox.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
SiteCheckBoxActionPerformed(evt);
}
});
UrlPanel.add(SiteCheckBox);
jSeparator4.setOrientation(javax.swing.SwingConstants.VERTICAL);
UrlPanel.add(jSeparator4);
MaxPagesLabel.setText("Max Pages");
UrlPanel.add(MaxPagesLabel);
MaxPagesTextField.setText(Integer.toString(MaxPages));
MaxPagesTextField.setMaximumSize(new java.awt.Dimension(50, 30));
MaxPagesTextField.setPreferredSize(new java.awt.Dimension(50, 23));
UrlPanel.add(MaxPagesTextField);
StartButton.setText("Start");
StartButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
StartButtonActionPerformed(evt);
}
});
UrlPanel.add(StartButton);
TopPanel.add(UrlPanel);
SortPanel.setLayout(new javax.swing.BoxLayout(SortPanel, javax.swing.BoxLayout.X_AXIS));
SortbyLabel.setText("Sort By ");
SortPanel.add(SortbyLabel);
SortbyLabel.getAccessibleContext().setAccessibleName("Sort By ");
SortComboBox.setModel(new javax.swing.DefaultComboBoxModel(new String[] { "InLink", "OutLink", "ErrorLink", "Last Modified", "State", "Size" }));
SortComboBox.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
SortComboBoxActionPerformed(evt);
}
});
SortPanel.add(SortComboBox);
jSeparator1.setOrientation(javax.swing.SwingConstants.VERTICAL);
SortPanel.add(jSeparator1);
DisplayButtonGroup.add(ExplorerRadioButton);
ExplorerRadioButton.setText("Explorer");
ExplorerRadioButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
ExplorerRadioButtonActionPerformed(evt);
}
});
SortPanel.add(ExplorerRadioButton);
DisplayButtonGroup.add(SummaryRadioButton);
SummaryRadioButton.setSelected(true);
SummaryRadioButton.setText("Summary");
SummaryRadioButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
SummaryRadioButtonActionPerformed(evt);
}
});
SortPanel.add(SummaryRadioButton);
DisplayButtonGroup.add(SourceRadioButton);
SourceRadioButton.setText("Source");
SourceRadioButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
SourceRadioButtonActionPerformed(evt);
}
});
SortPanel.add(SourceRadioButton);
jSeparator2.setOrientation(javax.swing.SwingConstants.VERTICAL);
SortPanel.add(jSeparator2);
SaveReportButton.setText("Save Report");
SaveReportButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
SaveReportButtonActionPerformed(evt);
}
});
SortPanel.add(SaveReportButton);
TopPanel.add(SortPanel);
getContentPane().add(TopPanel, java.awt.BorderLayout.NORTH);
BottomPanel.setLayout(new java.awt.BorderLayout());
BottomPanel.setBorder(new javax.swing.border.EtchedBorder());
jScrollPaneLeft.setMinimumSize(new java.awt.Dimension(0, 200));
jScrollPaneLeft.setPreferredSize(new java.awt.Dimension(400, 800));
UrlList.setModel(hrefModel);
UrlList.setMaximumSize(new java.awt.Dimension(1000, 2000));
UrlList.setMinimumSize(new java.awt.Dimension(200, 400));
UrlList.setPreferredSize(new java.awt.Dimension(400, 600));
UrlList.addListSelectionListener(new javax.swing.event.ListSelectionListener() {
public void valueChanged(javax.swing.event.ListSelectionEvent evt) {
UrlListValueChanged(evt);
}
});
UrlList.addMouseListener(new java.awt.event.MouseAdapter() {
public void mouseClicked(java.awt.event.MouseEvent evt) {
UrlListMouseClicked(evt);
}
});
jScrollPaneLeft.setViewportView(UrlList);
SplitPane.setLeftComponent(jScrollPaneLeft);
jScrollPaneRight.setMinimumSize(new java.awt.Dimension(0, 200));
DisplayPanel.setEditable(false);
DisplayPanel.setMaximumSize(new java.awt.Dimension(1000, 2000));
DisplayPanel.setMinimumSize(new java.awt.Dimension(0, 200));
DisplayPanel.setPreferredSize(new java.awt.Dimension(400, 800));
jScrollPaneRight.setViewportView(DisplayPanel);
SplitPane.setRightComponent(jScrollPaneRight);
BottomPanel.add(SplitPane, java.awt.BorderLayout.CENTER);
ToolsPanel.setLayout(new javax.swing.BoxLayout(ToolsPanel, javax.swing.BoxLayout.Y_AXIS));
ToolsPanel.setBorder(new javax.swing.border.EtchedBorder());
ToolsPanel.setMinimumSize(new java.awt.Dimension(20, 200));
ToolsPanel.setPreferredSize(new java.awt.Dimension(90, 600));
ToolsLabel.setText("Editor Tools");
ToolsPanel.add(ToolsLabel);
TitleButton.setLabel("Change
Title");
TitleButton.setMaximumSize(new java.awt.Dimension(100, 40));
TitleButton.setMinimumSize(new java.awt.Dimension(80, 40));
TitleButton.setPreferredSize(new java.awt.Dimension(80, 40));
TitleButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
TitleButtonActionPerformed(evt);
}
});
ToolsPanel.add(TitleButton);
KeywordButton.setText("Change
Keyword");
KeywordButton.setMaximumSize(new java.awt.Dimension(100, 40));
KeywordButton.setMinimumSize(new java.awt.Dimension(100, 40));
KeywordButton.setPreferredSize(new java.awt.Dimension(80, 40));
KeywordButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
KeywordButtonActionPerformed(evt);
}
});
ToolsPanel.add(KeywordButton);
SaveButton.setText("Save");
SaveButton.setMaximumSize(new java.awt.Dimension(100, 27));
SaveButton.setMinimumSize(new java.awt.Dimension(80, 27));
SaveButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
SaveButtonActionPerformed(evt);
}
});
ToolsPanel.add(SaveButton);
BottomPanel.add(ToolsPanel, java.awt.BorderLayout.EAST);
getContentPane().add(BottomPanel, java.awt.BorderLayout.CENTER);
StatusPanel.setLayout(new javax.swing.BoxLayout(StatusPanel, javax.swing.BoxLayout.X_AXIS));
StatusPanel.setBorder(new javax.swing.border.EtchedBorder());
StatusLabel.setText("Status:");
StatusPanel.add(StatusLabel);
getContentPane().add(StatusPanel, java.awt.BorderLayout.SOUTH);
pack();
}
// //GEN-END:initComponents
private void UrlTextFieldActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_UrlTextFieldActionPerformed
// TODO add your handling code here:
StartButtonActionPerformed(evt);
}//GEN-LAST:event_UrlTextFieldActionPerformed
private void UrlListValueChanged(javax.swing.event.ListSelectionEvent evt) {//GEN-FIRST:event_UrlListValueChanged
// TODO add your handling code here:
//if the list is empty, do nothing.
if(hrefModel.size()==0)
return;
String s;
setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) );
s = (String)UrlList.getSelectedValue();
currentURL = getUrlByListStr(s);
updateDisplay(currentURL);
setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) );
}//GEN-LAST:event_UrlListValueChanged
private void SiteCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SiteCheckBoxActionPerformed
// TODO add your handling code here:
if(InsideSite==1)
InsideSite=0;
else
InsideSite=1;
}//GEN-LAST:event_SiteCheckBoxActionPerformed
private void SaveButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SaveButtonActionPerformed
// TODO add your handling code here:
String s;
if(DisplayPanel.getText().equals("") || currentURL.equals("")){
this.statusDisplay("Choose a page first, please.");
return;
}
if((( urlDetail)urlHt.get(currentURL)).isBadLink()){
this.statusDisplay("Bad Link. Choose a good page, please.");
return;
}
if (this.displayType!=2){//fetch URL and save
s=fetchURL(currentURL);
}
else{
s=DisplayPanel.getText();
}
if(s==null){
statusDisplay("Content is empty.");
return;
}
reportFileChooser.addChoosableFileFilter(new HtmlFilter());
int returnVal = reportFileChooser.showSaveDialog(SiteTool.this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = reportFileChooser.getSelectedFile();
try {
PrintWriter dataFile = new PrintWriter(new FileWriter(file));
dataFile.println(s);
dataFile.close();
}
catch(Exception ee) {
ee.printStackTrace();
}
}
}//GEN-LAST:event_SaveButtonActionPerformed
private void KeywordButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_KeywordButtonActionPerformed
// TODO add your handling code here:
if(DisplayPanel.getText().equals("") || currentURL.equals("")){
this.statusDisplay("Choose a page first, please.");
return;
}
if((( urlDetail)urlHt.get(currentURL)).isBadLink()){
this.statusDisplay("Bad Link. Choose a good page, please.");
return;
}
String content;
//set source mode;
if(this.displayType!=2){
this.displayType=2;
SourceRadioButton.setSelected(true);
StyledEditorKit sek = new StyledEditorKit();
//DefaultEditorKit sek= new DefaultEditorKit();
DisplayPanel.setEditorKit(sek);
DisplayPanel.setContentType("text/plain");
content=fetchURL(currentURL);
}
else{
content=DisplayPanel.getText();
}
if (content==null) return;
DisplayPanel.setText("");
String oldkeywords=(( urlDetail)urlHt.get(currentURL)).getKeywords();
String newkeywords = (String)JOptionPane.showInputDialog(null,
"Old Keywords: "+oldkeywords+"\nType New Keywords Here: ",
"Change Keywords",JOptionPane.INFORMATION_MESSAGE,null,null,"");
if(newkeywords==null) return;
newkeywords=newkeywords.trim();
// String content=fetchURL(currentURL);
String lt = content.toLowerCase();
StringBuffer sBuf = new StringBuffer();
String aux=null;
int startpos=0, endpos=0;
if (!oldkeywords.equals("No Key Words")) {
int pos = myFind(lt,"meta");
pos = content.indexOf(oldkeywords,pos);
sBuf.append(content.substring(0,pos));
sBuf.append(" ");
sBuf.append(newkeywords);
sBuf.append(content.substring(pos+oldkeywords.length()+1));
}
else {
int p=-1,pos=0;
while (true) {
pos = myFind(lt,"meta");
if (pos>0) {p=lt.lastIndexOf('<',pos);break;}
pos = myFind(lt,"head");
if (pos>0) {p=lt.indexOf('>',pos)+1;break;}
aux="head"; // add head tag
pos = myFind(lt,"html");
if (pos>0) {p=lt.indexOf('>',pos)+1;break;}
p=0;
}
sBuf.append(content.substring(0,p));
if (aux!=null) sBuf.append("<"+aux+">\n");
sBuf.append("\n");
if (aux!=null) sBuf.append(""+aux+">\n");
sBuf.append(content.substring(p));
}
DisplayPanel.setText(sBuf.toString());
Document displayDoc =DisplayPanel.getDocument();
String docStr=new String();
try {
docStr=displayDoc.getText(0,displayDoc.getLength());
}catch(Exception e){
}
if (!oldkeywords.equals("No Key Words")) {
int pos = myFind(docStr.toLowerCase(),"meta");
pos = docStr.toLowerCase().indexOf(oldkeywords,pos);
startpos=pos;
endpos=startpos+oldkeywords.length()+1+newkeywords.length();
}
else{
startpos=docStr.indexOf("',pos)+1;
p2 = lt.indexOf('>',p2+1)+1;
break;
}
pos = myFind(lt,"head");
if (pos>0) {
p1 = lt.indexOf('>',pos)+1;
p2 = p1;
break;
}
aux="head"; // no head tag
pos = myFind(lt,"html");
if (pos>0) {
p1 = lt.indexOf('>',pos)+1;
p2 = p1;
break;
}
p1=0;p2=0;
}
sBuf.append(content.substring(0,p1));
if (aux!=null) sBuf.append("<"+aux+">\n");
sBuf.append(""+newtitle+"\n");
if (aux!=null) sBuf.append(""+aux+">\n");
sBuf.append(content.substring(p2));
DisplayPanel.setText(sBuf.toString());
Document displayDoc =DisplayPanel.getDocument();
String docStr=new String();
try {
docStr=displayDoc.getText(0,displayDoc.getLength());
}catch(Exception e){
}
int startpos,endpos;
pos = docStr.toLowerCase().indexOf(new String(""+newtitle));
startpos=pos+(new String("")).length();
endpos=startpos+(new String(newtitle)).length();
DisplayPanel.setCaretPosition(0);
DisplayPanel.select(startpos,endpos);
DisplayPanel.setEditable(true);
//DisplayPanel.setSelectionColor(Color.gray);
//DisplayPanel.setSelectedTextColor(Color.red) ;
DisplayPanel.grabFocus();
repaint();
}
}//GEN-LAST:event_TitleButtonActionPerformed
private int myFind(String str, String s) {
int len = s.length();
int pos=0;
while ((pos = str.indexOf(s,pos))>0) {
int p1 = str.lastIndexOf('<',pos);
if (p1<0) {pos+=len;continue;}
String tmp = str.substring(p1+1,pos).trim();
if (tmp.equals("")) break;
pos+=len;
}
return pos;
}
private void UrlListMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_UrlListMouseClicked
// TODO add your handling code here:
//if the list is empty, do nothing.
// if(hrefModel.size()==0)
// return;
//
// String s;
//
// setCursor( Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR) );
//
// s = (String)UrlList.getSelectedValue();
// currentURL = getUrlByListStr(s);
//
// updateDisplay(currentURL);
//
//
// setCursor( Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR) );
}//GEN-LAST:event_UrlListMouseClicked
private void SaveReportButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SaveReportButtonActionPerformed
// TODO add your handling code here:
reportFileChooser.addChoosableFileFilter(new HtmlFilter());
int returnVal = reportFileChooser.showSaveDialog(SiteTool.this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = reportFileChooser.getSelectedFile();
try {
PrintWriter dataFile = new PrintWriter(new FileWriter(file));
dataFile.println("Spider Result");
dataFile.println("Spider Result of "+seedURL+"
");
for (Iterator it=urlHs.iterator();it.hasNext();){
dataFile.println(""+((String)it.next())+"
");
}
dataFile.println("");
dataFile.close();
}
catch(Exception ee) {
ee.printStackTrace();
}
}
}//GEN-LAST:event_SaveReportButtonActionPerformed
private void SourceRadioButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SourceRadioButtonActionPerformed
// TODO add your handling code here:
if(this.displayType!=2){
this.displayType=2;
updateDisplay(currentURL);
}
}//GEN-LAST:event_SourceRadioButtonActionPerformed
private void SummaryRadioButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SummaryRadioButtonActionPerformed
// TODO add your handling code here:
if(this.displayType!=1){
this.displayType=1;
updateDisplay(currentURL);
}
}//GEN-LAST:event_SummaryRadioButtonActionPerformed
private void ExplorerRadioButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ExplorerRadioButtonActionPerformed
if(this.displayType!=0){
this.displayType=0;
updateDisplay(currentURL);
}
}//GEN-LAST:event_ExplorerRadioButtonActionPerformed
private void SortComboBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SortComboBoxActionPerformed
// TODO add your handling code here:
int newvalue=SortComboBox.getSelectedIndex();
// System.out.println("new value="+newvalue);
if( this.sortbyType == newvalue){
return;
}
else{
this.sortbyType=newvalue;
if(resultList.size()!=0){
sortUrl(this.sortbyType);
generateList();
//pack();
show();
}
}
}//GEN-LAST:event_SortComboBoxActionPerformed
private void StartButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_StartButtonActionPerformed
// TODO add your handling code here:
MaxPages= Integer.valueOf(MaxPagesTextField.getText()).intValue();
if(MaxPages<=0){
statusDisplay("Invalid MaxPages!");
return;
}
else{
// statusDisplay("Crawling......Please Wait......");
// repaint();
getPages();
statusDisplay("Finished Crawling.");
}
}//GEN-LAST:event_StartButtonActionPerformed
/**Write s to statusbar
*/
private void statusDisplay(String s){
this.StatusLabel.setText(s);
}
/**Display URL content in the DisplayPanel
*type=0:html; 1:image;
*/
private void explorerDisplay(String url, int type){
StyledEditorKit sek = new StyledEditorKit();
DisplayPanel.setEditorKit(sek);
DisplayPanel.setContentType("text/plain");
DisplayPanel.setContentType("text/html");
DisplayPanel.setEditable(false);
if(type==0){
try{
DisplayPanel.setPage(url);
DisplayPanel.setCaretPosition(0);
}catch(IOException e){
System.out.println("explorer display ioexception: "+url);
}
catch(Exception ee) {
System.out.println("explorer display other exception: "+url);
ee.printStackTrace();
}
}
else if(type==1){
StringBuffer sBuf = new StringBuffer();
if(!urlHt.containsKey(url)){
System.out.println("url not exist in urlHt: "+url);
return;
}
urlDetail tempDetail=(urlDetail)urlHt.get(url);
sBuf.append(tempDetail.getDisplayText());
DisplayPanel.setText(sBuf.toString());
}
}
private void summaryDisplay(String url, int type){
//image or not is processed in urlDetail.getDisplayText,
//so don't worry here;
StringBuffer sBuf = new StringBuffer();
if(!urlHt.containsKey(url)){
System.out.println("url not exist in urlHt: "+url);
return;
}
DisplayPanel.setContentType("text/html");
DisplayPanel.setEditable(false);
urlDetail tempDetail=(urlDetail)urlHt.get(url);
sBuf.append(tempDetail.getDisplayText());
DisplayPanel.setText(sBuf.toString());
DisplayPanel.setCaretPosition(0);
}
/**Display Source
*/
private void sourceDisplay(String url, int type){
StyledEditorKit sek = new StyledEditorKit();
DisplayPanel.setEditorKit(sek);
DisplayPanel.setContentType("text/plain");
if(type==0){
DisplayPanel.setEditable(true);
String text = fetchURL(url);
if (text!=null) {
DisplayPanel.setText(text);
DisplayPanel.setCaretPosition(0);
}
}
else if(type==1){
DisplayPanel.setEditable(false);
DisplayPanel.setText("Image file can not be displayed in source mode.\n Please change to other modes;");
}
}
private void displayBadLink(String url){
DisplayPanel.setContentType("text/html");
DisplayPanel.setEditable(false);
StringBuffer sBuf = new StringBuffer();
if(!urlHt.containsKey(url)){
System.out.println("url not exist in urlHt: "+url);
return;
}
// sBuf.append("This is a bad link:"+url);
sBuf.append(""+((urlDetail)urlHt.get(url)).getDisplayText());
DisplayPanel.setText(sBuf.toString());
}
/** Update DiaplayPanel
*/
private void updateDisplay(String url){
statusDisplay(url);
int type=0;//html
try{
urlDetail tempDetail=(urlDetail)urlHt.get(url);
if(tempDetail.isBadLink()){
//bad link
displayBadLink(url);
show();
return;
}
else if(tempDetail.isImage()){
type=1;//image
}
}catch(NullPointerException e){
System.out.println("updateDisplay: url not exist in urlHt ["+url+"]");
}
if(displayType==0){//explorer
explorerDisplay(url,type);
}
else if(displayType==1){
summaryDisplay(url,type);
}
else if(displayType==2){
sourceDisplay(url,type);
}
else{
}
show();
}
/**
* @param args the command line arguments
*/
public static void main(String args[]) {
try {
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
}
catch(Exception e) {
e.printStackTrace();
}
java.awt.EventQueue.invokeLater(new Runnable() {
public void run() {
new SiteTool().setVisible(true);
}
});
}
// Variables declaration - do not modify//GEN-BEGIN:variables
private javax.swing.JPanel BottomPanel;
private javax.swing.ButtonGroup DisplayButtonGroup;
private javax.swing.JTextPane DisplayPanel;
private javax.swing.JRadioButton ExplorerRadioButton;
private javax.swing.JButton KeywordButton;
private javax.swing.JLabel MaxPagesLabel;
private javax.swing.JTextField MaxPagesTextField;
private javax.swing.JButton SaveButton;
private javax.swing.JButton SaveReportButton;
private javax.swing.JCheckBox SiteCheckBox;
private javax.swing.JComboBox SortComboBox;
private javax.swing.JPanel SortPanel;
private javax.swing.JLabel SortbyLabel;
private javax.swing.JRadioButton SourceRadioButton;
private javax.swing.JSplitPane SplitPane;
private javax.swing.JButton StartButton;
private javax.swing.JLabel StatusLabel;
private javax.swing.JPanel StatusPanel;
private javax.swing.JRadioButton SummaryRadioButton;
private javax.swing.JButton TitleButton;
private javax.swing.JLabel ToolsLabel;
private javax.swing.JPanel ToolsPanel;
private javax.swing.JPanel TopPanel;
private javax.swing.JLabel UrlLabel;
private javax.swing.JList UrlList;
private javax.swing.JPanel UrlPanel;
private javax.swing.JTextField UrlTextField;
private javax.swing.JScrollPane jScrollPaneLeft;
private javax.swing.JScrollPane jScrollPaneRight;
private javax.swing.JSeparator jSeparator1;
private javax.swing.JSeparator jSeparator2;
private javax.swing.JSeparator jSeparator3;
private javax.swing.JSeparator jSeparator4;
private javax.swing.JSeparator jSeparator5;
private javax.swing.JFileChooser reportFileChooser;
// End of variables declaration//GEN-END:variables
}
class urlDetail extends Object implements Cloneable{
private String selfUrl;
private HashSet fromHs, toHs, errHs;
private int statusCode, fromSize, toSize, errSize, size;
private long lastMod;
private String title, meta, keywords;
public Object clone() {
Object o = null;
try {
o = super.clone();
}
catch (CloneNotSupportedException e) {
System.out.println("urlDetail can't clone.");
}
return o;
}
public void addTo(String to){
toHs.add(to);
}
public void addFrom(String from){
fromHs.add(from);
}
public urlDetail(String self){
//initialize
this.selfUrl=new String(self);
fromHs = new HashSet();
toHs = new HashSet();
errHs = new HashSet();
statusCode = 0;
size = 0;
lastMod = 0;
fromSize = 0;
toSize = 0;
errSize=0;
title=new String();
meta=new String();
keywords=new String();
}
public urlDetail(){
this("");
}
public HashSet getToHs(){
return toHs;
}
public int getStatus(){
return this.statusCode;
}
public String getTitle(){
return this.title;
}
public String getKeywords(){
return this.keywords;
}
public void setErrHs(HashSet hs){
errHs=(HashSet)hs.clone();
}
// public void createErrHs(){
//
//
// }
public void updateNumbers(){
fromSize=fromHs.size();
toSize=toHs.size();
errSize=errHs.size();
}
public boolean isImage(){
//image
//in future, should modify as .jpg appear in the end of url.
if(selfUrl.indexOf(".jpg")>=0||selfUrl.indexOf(".gif")>=0||selfUrl.indexOf(".JPG")>=0||selfUrl.indexOf(".GIF")>=0){
return true;
}
else{
return false;
}
}
public boolean isBadLink(){
if(this.statusCode==200){
return false;
}
else{
return true;
}
}
public void updateState(int stateCode,int size, long lastMod){
this.statusCode=stateCode;
this.size=size;
this.lastMod=lastMod;
}
public void updateHead(String title,String keywords,String meta){
this.title=title;
this.keywords=keywords;
this.meta=meta;
}
public String getDisplayText(){
StringBuffer sBuf=new StringBuffer();
if(isBadLink()){
sBuf.append( "This is a bad link!
");
}
//image
if(isImage()){
sBuf.append("URL: ["+selfUrl+"]");
sBuf.append("
");
return sBuf.toString();
}
sBuf.append("URL: ["+selfUrl+"]");
sBuf.append("
Title: ["+title+"]");
sBuf.append("
KeyWords: ["+keywords+"]");
sBuf.append("
Meta: ["+meta+"]");
sBuf.append("
Size: ["+size+" Bytes]");
sBuf.append("
Last Modified: ["+new Date(lastMod)+"]");
sBuf.append("
In Links: ["+fromSize+"]");
for(Iterator it=this.fromHs.iterator();it.hasNext();){
sBuf.append("- "+(String)it.next()+"
");
}
sBuf.append("
");
sBuf.append("Out Links: ["+toSize+"]");
for(Iterator it=this.toHs.iterator();it.hasNext();){
sBuf.append("- "+(String)it.next()+"
");
}
sBuf.append("
");
sBuf.append("Error Links: ["+errSize+"]");
for(Iterator it=this.errHs.iterator();it.hasNext();){
sBuf.append("- "+(String)it.next()+"
");
}
sBuf.append("
");
//
return sBuf.toString();
}
public String getUrl(){
return this.selfUrl;
}
public int getIn(){
return this.fromSize;
}
public int getOut(){
return this.toSize;
}
public int getErr(){
return this.errSize;
}
public long getLastmod(){
return this.lastMod;
}
public int getSize(){
return this.size;
}
public String toString() {
String s=""+this.selfUrl+" state:["+this.statusCode+"] in:["+fromSize+"] out:["+toSize+ "] error:["+errSize+"] size:["+size+"] lastMod:[";
if (lastMod>0) {
s=s+(new Date(lastMod));
}
s+="]\n";
return s;
}
}
class urlComparer implements Comparator {
public int compare(Object o1, Object o2) {
String url1=((urlDetail)o1).getUrl();
String url2=((urlDetail)o2).getUrl();
return url1.compareTo(url2);
}
}
class stateComparer implements Comparator {
public int compare(Object o1, Object o2) {
int state1=((urlDetail)o1).getStatus();
int state2=((urlDetail)o2).getStatus();
return (state2-state1);
}
}
class inComparer implements Comparator {
public int compare(Object o1, Object o2) {
return ((urlDetail)o2).getIn()-((urlDetail)o1).getIn();
}
}
class sizeComparer implements Comparator {
public int compare(Object o1, Object o2) {
return ((urlDetail)o2).getSize()-((urlDetail)o1).getSize();
}
}
class outComparer implements Comparator {
public int compare(Object o1, Object o2) {
return ((urlDetail)o2).getOut()-((urlDetail)o1).getOut();
}
}
class errComparer implements Comparator {
public int compare(Object o1, Object o2) {
return ((urlDetail)o2).getErr()-((urlDetail)o1).getErr();
}
}
class lastmodComparer implements Comparator {
public int compare(Object o1, Object o2) {
long l=((urlDetail)o1).getLastmod()-((urlDetail)o2).getLastmod();
if(l>0 )
return -1;
else if(l==0)
return 0;
else
return 1;
}
}
class HtmlFilter extends javax.swing.filechooser.FileFilter {
final static String html = "html";
final static String htm = "htm";
// Accept all directories and all html,htm files.
public boolean accept(File f) {
if (f.isDirectory()) {
return true;
}
String s = f.getName();
int i = s.lastIndexOf('.');
if (i > 0 && i < s.length() - 1) {
String extension = s.substring(i+1).toLowerCase();
if (htm.equals(extension) ||
html.equals(extension) )
{
return true;
} else {
return false;
}
}
return false;
}
// The description of this filter
public String getDescription() {
return "HTML Files";
}
}