HomeDigital EditionSys-Con RadioSearch Java Cd
Advanced Java AWT Book Reviews/Excerpts Client Server Corba Editorials Embedded Java Enterprise Java IDE's Industry Watch Integration Interviews Java Applet Java & Databases Java & Web Services Java Fundamentals Java Native Interface Java Servlets Java Beans J2ME Libraries .NET Object Orientation Observations/IMHO Product Reviews Scalability & Performance Security Server Side Source Code Straight Talking Swing Threads Using Java with others Wireless XML
 

"Create a Distributed Intranet Search Mechanism
 Using Java Servlets"
Volume: 3 Issue: 7, p. 24

	

Listing 1: Search Manager class.
 
import java.io.*; 
import java.util.*; 
import java.lang.*; 
import javax.servlet.*; 
import javax.servlet.http.*; 

// THIS CLASS IS USED TO MANAGE THE SearchEngine THREADS  
// AND PROVIDE JAVA SERVLET FUNCTIONALITY  

public class SearchManager extends HttpServlet {  
    private ServletOutputStream out; // THE OUTPUTSTREAM BACK TO THE CLIENT'S BROWSER 
    private String query=null;     // THE QUERY CRITERIA TO USE IN THE SEARCH  
    private Vector searchEngines = new Vector(); // A LIST OF SearchEngine OBJECTS 
    // A ThreadGroup TO MANAGE THE SearchEngine THREADS 
    private ThreadGroup threadgroup = new ThreadGroup("searchEngines");  
  
    // doGet PROCESSES HTTP GET REQUESTS FROM CLIENT CONNECTIONS  
    public void doGet (HttpServletRequest req,  
                       HttpServletResponse res)  
                       throws ServletException, IOException {  
      int cnt = 1,index=0; 
      String link,inline; // BUFFER VARIABLES  
      String currenthost=null; // THE HOST NAME OF THE CURRENT SearchEngine 
      SearchEngine currentse=null; // CURRENT SearchEngine OBJECT 
      Enumeration se=null; 
  
      // GET THE RESPONE OUTPUT STREAM AND REDIRECT THE out  
      // AND err OUTPUTSTREAMS TO RETURN RESULTS TO THE CLIENT  
      // CONNECTIONS 
      out = res.getOutputStream(); 

      // REDIRECT ERRORS TO THE CLIENT'S BROWSER 
      System.setErr(new PrintStream(out));  

      // REDIRECT STANDARD OUTPUT TO THE CLIENT'S BROWSER  
      System.setOut(new PrintStream(out));  

      // SET THE RESPONSE CONTENT TO HTML TEXT 
      res.setContentType("text/html"); 
  
      // GET THE QUERY PARAMETER 
      try { 
        query = req.getParameter("query"); 
       if(query == null || query.length() == 0) { 
          out.println("<H1 ALIGN=CENTER>Please input a "+ 
                      "search string !</H1>\n"); 
          out.println("</BODY></HTML>\n"); 
          out.flush(); 
          return; 
        } 
      } 
      catch (Exception e) {  
        System.err.println("SearchManager (doGet): "+e);  
        System.err.flush(); } 

      // INITIALIZE THE SEARCH ENGINE THREADS 
      if(!initialize()) return;  

      try { 
      // READ THE TEMPLATE USED TO FORMAT THE SEARCH RESULTS 
      BufferedReader in =  
          new BufferedReader(new FileReader(System.getProperty("user.dir")+ 
                                            "/searchservlet.pat")); 
      // READ IN THE TEMPLATE FILE UNTIL THE <<results>>  
      // TAG IS FOUND, THEN WRITE OUT THE RESULTS. LOOP ACCROSS 
      // EACH SEARCH ENGINE GATHERING THE RESULTS FROM EACH SEARCH  
      // ENGINE ONE RESULT AT A TIME. THIS WAY THE MORE PERTINENT  
      // SEARCH RESULTS FROM EACH ENGINE WILL APPEAR AT THE TOP OF  
      // THE DOCUMENT. 
      while((inline=in.readLine()) != null) { 
        if((index=inline.toLowerCase().indexOf("<<results>>")) > -1) { 
          System.out.println(inline.substring(0,index));  
          int exhaustedEngines=0; 
          while(searchEngines.size() > exhaustedEngines) { 
            exhaustedEngines=0;  
            // if all the engines have been exhausted then this  
            // variable will = searchEngines.size() 
            for(int i=0;i<searchEngines.size();i++) { 
              currentse = (SearchEngine)searchEngines.elementAt(i);  
              currenthost = currentse.getHost(); 
              // THIS SEARCH ENGINES RESULTS HAVE BEEN EXHAUSTED  
              if(currentse.getResultsCount() == 0) {  
                ++exhaustedEngines; 
                continue; 
              }  
              try {  
                // get the first element 
                link = currentse.getResultItem(0);  
                ((SearchEngine) searchEngines.elementAt( 
                  searchEngines.indexOf(currentse))).removeResultItem(0);  
              } 
              catch (ArrayIndexOutOfBoundsException e) {  
                  ++exhaustedEngines; 
                  continue;  
              }  
  
              // WE DO NOT WANT IMAGES AND BLANK ANCHORS 
              if(link.toLowerCase().indexOf("http://") == -1) { 
                int idx; // SCRATCH VARIABLE 
                if((idx = link.toLowerCase().indexOf("href=\"")) > -1) { 
                  if(link.toLowerCase().indexOf("href=\"/") > -1) 
                    link = link.substring(0,idx+6)+currenthost+ 
                           link.substring(idx+6);  
                  else 
                    link = link.substring(0,idx+6)+currenthost+"/"+ 
                           link.substring(idx+6);  
                }  
                else { 
                  idx = link.toLowerCase().indexOf("href="); 
                  if(link.charAt(idx+6) == '/')  
                    link = link.substring(0,idx+5)+currenthost+ 
                           link.substring(idx+5);  
                  else 
                    link = link.substring(0,idx+5)+currenthost+"/"+ 
                           link.substring(idx+5);  
                }  
              } 
              out.println(String.valueOf(cnt++)+". "+link+"<BR>");  
            } 
          }  
          // PRINT OUT THE REST OF THE LINE AFTER THE  
          // <<results>> META TAG 
          System.out.println(inline.substring(index+11));  
        } 
        else { 
          System.out.println(inline); 
        }  
       }  
      } catch (Exception e) { System.err.println("SearchManager (doGet): "+e); } 
      out.flush();  
      out.close(); 
    } 
    /* THIS METHOD INITIALIZES AND RUNS THE SEARCH ENGINES  
     AND WAITS FOR ALL SEARCH ENGINE THREADS TO COMPLETE.  
     THE FILE searchurls.conf IS READ FROM THE CURRENT WORKING 
     DIRECTORY TO PROVIDE A LIST OF SEARCH ENGINE URL'S. THE  
     URLS LISTED IN THE CONFIGURATION FILE MUST END WITH THE  
     SEARCH ENGINE'S QUERY PARAMETER FOLLOWED BY AN EQUAL SIGN.  
     example:  www.search.com/cgi-bin/search?query=</B> */ 

    public boolean initialize() { 
      String inline;            // A SCRATCH VARIABLE  
      String servlet_dir=null;  // THE SERVLET HOME DIRECTORY 
      SearchEngine se=null;     // A SearchEngine OBJECT 
  
      try { 
        // GET THE SERVLET HOME DIRECTORY 
        servlet_dir = System.getProperty("user.dir"); 

        // OPEN AN INPUT STREAM TO THE CONFIGURATION FILE THAT 
        // LISTS THE URL'S OF THE SEARCH ENGINES TO BE INTEGRATED 
        BufferedReader in =  
          new BufferedReader(new FileReader(servlet_dir.replace('\\','/')+ 
          (servlet_dir.charAt(servlet_dir.length()-1) == 
          '/'?"searchurls.conf":"/searchurls.conf")));  
  
        // INITIALIZE THE LIST OF SEARCH ENGINES 
        searchEngines.removeAllElements(); 
  
        // READ IN THE URL'S FROM THE searchurls.conf FILE 
        while((inline=in.readLine()) != null) {  
          // CREATE A SEARCH ENGINE INSTANCE USING THE URL  
          // READ IN FROM THE searchurls.conf FILE AND  
          // CONCATENATE THE USER SUPPLIED SEARCH CRITERIA  
          // NOTE: THE URL LISTED IN THE CONFIGURATION FILE  
          // MUST END WITH THE SEARCH ENGINES QUERY PARAMETER  
          // FOLLOWED BY AN EQUAL SIGN. 
          // example:  www.search.com/cgi-bin/search?query=  

          se = new SearchEngine(inline.trim()+query); 
          se.addIgnoreTag("<img"); 
          se.addIgnoreTag("<IMG"); 
          se.addIgnoreTag(">_>"); 
          searchEngines.addElement(se); 
          // START THE SEARCH ENGINE THREAD 
          new Thread(threadgroup,se).start();  
        }                                       
  
        // WAIT FOR ALL OF THE SEARCH THREADS TO FINISH PROCESSING 
        while(threadgroup.activeCount() > 0) 
          Thread.currentThread().sleep(50);  
  
      } catch (Exception e) {  
        System.err.println("initialize: "+e); System.err.flush(); 
        return false; } 
      return true;  
    } 
} 
  

Listing 2: HTMLAnchor Parser class.
 
import java.io.*; 
import java.lang.*; 
import java.util.*; 

public class HTMLAnchorParser { 
  private DataInputStream in=null; // INPUT STREAM TO READ 
  private Vector ignoreTags=new Vector(); // TAG IGNORE CRITERIA 
  
  // CONSTRUCTOR TO INITIALIZE THE CLASS  
  public HTMLAnchorParser() { ; }  
  
  // CONSTRUCTOR: INITIALIZE CLASS USING INPUT STREAM  
  public HTMLAnchorParser(InputStream in) { 
    setInputStream(in);  
  } 
  
  // READS InputStream AND RETURNS A VECTOR OF ANCHOR TAGS 
  public Vector getAnchorTags(InputStream in) throws IOException { 
    setInputStream(in); 
    return(getAnchorTags()); 
  } 
  
  // SETS THE InputStream TO BE PARSED  
  private void setInputStream(InputStream in) { 
    this.in = new DataInputStream(in);  
  } 

  // ADDS A TAG TO THE IGNORE TAG LIST. WHILE READING A URL, 
  // IF A LINK CONTAINS ANY MATCHES FROM THE IGNORE TAGS LIST 
  // IT IS NOT INCLUDED IN THE RESULTS 
  
  public void addIgnoreTag(String tag) { ignoreTags.addElement(tag); } 

  // DETERMINES IF A TAG SHOULD BE IGNORED 
  private boolean ignoreAnchor(String anchor) { 
    for(int i=0;i<ignoreTags.size();i++) 
      if(anchor.indexOf((String)ignoreTags.elementAt(i)) > -1) 
        return true; 
    return false; 
  } 
  
  // READS AN InputStream AND RETURNS A VECTOR OF ANCHOR TAGS 
  public Vector getAnchorTags() throws IOException {  
    int charbuf[] = new int[4]; 
    StringBuffer sb = new StringBuffer(); 
    StringBuffer link = new StringBuffer(); 
    boolean startrecording=false; 
    Vector results = new Vector(); 
  
    try{ 
      if(in == null) 
        return null; 
      while(true) { 
        // YIELD THE CPU TO OTHER THREADS 
        Thread.currentThread().yield();  
        charbuf[0] = in.readByte(); 
        if(charbuf[0] == '<') {  // START OF TAG  
          // WE ARE RECORDING SO LOOK FOR ENDING TAG </A  
          if(startrecording) {  
            charbuf[1] = in.readByte();  
            charbuf[2] = in.readByte();  
            if(charbuf[1] == '/' && (charbuf[2] == 'a' || 
               charbuf[2] == 'A')) { 
              link.append("</A>"); 
              startrecording = false;  
              if(!ignoreAnchor(link.toString())) 
                results.addElement(link.toString()); 
              link = new StringBuffer(); 
            } 
            else { 
              // APPEND THE THREE CHARACTERS WE JUST READ 
              link.append((char)charbuf[0]); 
              link.append((char)charbuf[1]); 
              link.append((char)charbuf[2]); 
            } 
          } 
          else {  
       // SKIP PAST SPACES 
           while((char)(charbuf[1]=in.readByte()) == ' ') ;  
           charbuf[2] = (char)in.readByte(); 
           // START RECORDING IF THIS TAG IS AN ANCHOR <A 
           if(charbuf[0] == '<' && (charbuf[1] == 'a' ||  
              charbuf[1] == 'A') && charbuf[2] == ' ') {  
             link.append("<a ");  
             startrecording = true;  
           }  
           else {  // RESET NECESSARY VARIABLES 
             link = new StringBuffer(); 
             startrecording = false; 
             continue; 
           }  
          } 
        } 
        else if(startrecording)  
          link.append((char)charbuf[0]);  
      } 
    } 
    // EOFException CAUGHT HERE 
    catch (EOFException e) { ; }  
    return results; 
  } 
} 

Listing 3: SearchEngine class.
 
import java.net.*; 
import java.io.*; 
import java.util.*; 
import java.lang.Runnable; 
import java.util.Vector; 

// THIS CLASS OPENS A CONNECTION TO A SPECIFIED URL AND  
// READS THE CONTENTS PARSING OUT THE ANCHOR TAGS,  
// STORING THEM IN A VECTOR  
public class SearchEngine extends HTMLAnchorParser  
                                        implements Runnable { 
    private String query; 
    private Vector results = new Vector(); 
    private String serverURL; 
  
    // CONSTRUCTOR TO INSTANTIATE THE CLASS WITH A URL 
    public SearchEngine(String serverURL) { 
      this.serverURL = serverURL;  
    } 
  
    // METHOD THAT GETS CALLED WHEN THE THREAD IS STARTED  
    // USGING THE START METHOD  
    public void run() { 
      try { 
        URLConnection urlcon =  
                       (new URL(serverURL)).openConnection(); 
        if(urlcon == null) { 
           System.err.println("SearchEngine (run): "+ 
                              "Error opening URL connection."); 
           System.err.flush(); 
           return; 
        } 
  
        urlcon.connect();  
        getContent(urlcon);  
      } 
      catch (Exception e) {  
        System.err.println("SearchEngine(run): "+e);  
        System.err.flush(); } 
    }  

    // GETS THE CONTENT BY READING THE INPUT STREAM SPECIFIED  
    // IN THE URLConnection OBJECT  
    public void getContent(URLConnection urlc) {  
      try { 
        results = getAnchorTags(urlc.getInputStream());  
      }  
      catch (Exception e) {  
        System.err.println("SearchEngine (getContent): "+e);  
        System.err.flush(); } 
    }  
  
    // OUTPUTS THE RESULTS TO System.out. YOU CAN REDIRECT  
    // THIS OUTPUT USING THE System.setOut METHOD 
    public synchronized void outputResults() { 
       try { 
         for(Enumeration e=results.elements();e.hasMoreElements();) { 
            System.out.println((String)e.nextElement()); 
            System.out.flush(); 
         } 
       } catch (Exception e) {  
           System.err.println("SearchEngine (outputResults): "+e);  
           System.err.flush(); } 
    } 
  
    // SETS THE QUERY CRITERIA FOR A SEARCH 
    public void setQuery(String query) { 
      this.query = query; 
    } 

    // GETS THE QUERY CRITERIA FOR A SEARCH 
    public String getQuery() { 
      return query; 
    } 

    // RETURNS THE RESULTS OF THE SEARCH  
    // getContent MUST BE CALLED BEFORE THIS  
    // FUNCTION IS USED 
    public Vector getResults() { 
      return results; 
    } 

    // RETURNS THE HOST PORTION OF THE CURRENT SEARCH URL 
    public String getHost() { 
      int index = serverURL.indexOf("//"); 
      index = serverURL.indexOf("/",index+3); 
      return serverURL.substring(0,index); 
    } 
  
    // SET THE URL USED TO EXECUTE THE SEARCH 
    public void setServerURL(String serverURL) { 
      this.serverURL = serverURL;  
    } 

    // RETURNS THE URL OF THE SEARCH ENGINE 
    public String getServerURL() { 
      return serverURL; 
    } 

    // RETURNS THE NUMBER OF SEARCH RESULT ITEMS  
    public int getResultsCount() { return results.size(); } 
  
    // RETURNS A SEARCH RESULT GIVEN AN INDEX 
    public String getResultItem(int index) throws ArrayIndexOutOfBoundsException { 
      return (String)results.elementAt(index);  
    } 
  
    // REMOVES A SEARCH RESULT ITEM GIVEN AN INDEX  
    public void removeResultItem(int index) throws ArrayIndexOutOfBoundsException { 
      results.removeElementAt(0);  
    } 
} 
  
      
 

All Rights Reserved
Copyright ©  2004 SYS-CON Media, Inc.
  E-mail: [email protected]

Java and Java-based marks are trademarks or registered trademarks of Sun Microsystems, Inc. in the United States and other countries. SYS-CON Publications, Inc. is independent of Sun Microsystems, Inc.