View Javadoc

1   /* Heritrix
2    *
3    * $Id: Heritrix.java 6081 2008-12-09 00:58:14Z gojomo $
4    *
5    * Created on May 15, 2003
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler;
26  
27  import java.io.File;
28  import java.io.FileInputStream;
29  import java.io.FileNotFoundException;
30  import java.io.FileOutputStream;
31  import java.io.IOException;
32  import java.io.InputStream;
33  import java.io.PrintStream;
34  import java.io.PrintWriter;
35  import java.net.HttpURLConnection;
36  import java.net.InetAddress;
37  import java.net.URL;
38  import java.net.URLConnection;
39  import java.net.UnknownHostException;
40  import java.util.ArrayList;
41  import java.util.Arrays;
42  import java.util.Collection;
43  import java.util.Collections;
44  import java.util.Enumeration;
45  import java.util.Hashtable;
46  import java.util.Iterator;
47  import java.util.List;
48  import java.util.Map;
49  import java.util.Properties;
50  import java.util.StringTokenizer;
51  import java.util.TimeZone;
52  import java.util.Vector;
53  import java.util.logging.Level;
54  import java.util.logging.LogManager;
55  import java.util.logging.Logger;
56  
57  import javax.management.Attribute;
58  import javax.management.AttributeList;
59  import javax.management.AttributeNotFoundException;
60  import javax.management.DynamicMBean;
61  import javax.management.InstanceAlreadyExistsException;
62  import javax.management.InstanceNotFoundException;
63  import javax.management.InvalidAttributeValueException;
64  import javax.management.MBeanInfo;
65  import javax.management.MBeanNotificationInfo;
66  import javax.management.MBeanOperationInfo;
67  import javax.management.MBeanRegistration;
68  import javax.management.MBeanRegistrationException;
69  import javax.management.MBeanServer;
70  import javax.management.MBeanServerFactory;
71  import javax.management.MalformedObjectNameException;
72  import javax.management.NotCompliantMBeanException;
73  import javax.management.ObjectName;
74  import javax.management.ReflectionException;
75  import javax.management.RuntimeOperationsException;
76  import javax.management.openmbean.CompositeData;
77  import javax.management.openmbean.CompositeDataSupport;
78  import javax.management.openmbean.CompositeType;
79  import javax.management.openmbean.OpenDataException;
80  import javax.management.openmbean.OpenMBeanAttributeInfoSupport;
81  import javax.management.openmbean.OpenMBeanConstructorInfoSupport;
82  import javax.management.openmbean.OpenMBeanInfoSupport;
83  import javax.management.openmbean.OpenMBeanOperationInfoSupport;
84  import javax.management.openmbean.OpenMBeanParameterInfo;
85  import javax.management.openmbean.OpenMBeanParameterInfoSupport;
86  import javax.management.openmbean.OpenType;
87  import javax.management.openmbean.SimpleType;
88  import javax.management.openmbean.TabularData;
89  import javax.management.openmbean.TabularDataSupport;
90  import javax.management.openmbean.TabularType;
91  import javax.naming.CompoundName;
92  import javax.naming.Context;
93  import javax.naming.NameNotFoundException;
94  import javax.naming.NamingException;
95  import javax.naming.NoInitialContextException;
96  
97  import org.apache.commons.cli.Option;
98  import org.archive.crawler.admin.CrawlJob;
99  import org.archive.crawler.admin.CrawlJobErrorHandler;
100 import org.archive.crawler.admin.CrawlJobHandler;
101 import org.archive.crawler.datamodel.CredentialStore;
102 import org.archive.crawler.datamodel.credential.Credential;
103 import org.archive.crawler.event.CrawlStatusListener;
104 import org.archive.crawler.framework.AlertManager;
105 import org.archive.crawler.framework.CrawlController;
106 import org.archive.crawler.framework.exceptions.FatalConfigurationException;
107 import org.archive.crawler.framework.exceptions.InitializationException;
108 import org.archive.crawler.selftest.SelfTestCrawlJobHandler;
109 import org.archive.crawler.settings.XMLSettingsHandler;
110 import org.archive.io.SinkHandler;
111 import org.archive.io.SinkHandlerLogRecord;
112 import org.archive.net.UURI;
113 import org.archive.util.FileUtils;
114 import org.archive.util.IoUtils;
115 import org.archive.util.JmxUtils;
116 import org.archive.util.JndiUtils;
117 import org.archive.util.PropertyUtils;
118 import org.archive.util.TextUtils;
119 
120 import sun.net.www.protocol.file.FileURLConnection;
121 
122 
123 /***
124  * Main class for Heritrix crawler.
125  *
126  * Heritrix is usually launched by a shell script that backgrounds heritrix
127  * that redirects all stdout and stderr emitted by heritrix to a log file.  So
128  * that startup messages emitted subsequent to the redirection of stdout and
129  * stderr show on the console, this class prints usage or startup output
130  * such as where the web UI can be found, etc., to a STARTLOG that the shell
131  * script is waiting on.  As soon as the shell script sees output in this file,
132  * it prints its content and breaks out of its wait.
133  * See ${HERITRIX_HOME}/bin/heritrix.
134  * 
135  * <p>Heritrix can also be embedded or launched by webapp initialization or
136  * by JMX bootstrapping.  So far I count 4 methods of instantiation:
137  * <ol>
138  * <li>From this classes main -- the method usually used;</li>
139  * <li>From the Heritrix UI (The local-instances.jsp) page;</li>
140  * <li>A creation by a JMX agent at the behest of a remote JMX client; and</li>
141  * <li>A container such as tomcat or jboss.</li>
142  * </ol>
143  *
144  * @author gojomo
145  * @author Kristinn Sigurdsson
146  * @author Stack
147  */
148 public class Heritrix implements DynamicMBean, MBeanRegistration {
149     /***
150      * Heritrix logging instance.
151      */
152     private static final Logger logger =
153         Logger.getLogger(Heritrix.class.getName());
154     
155     private static final File TMPDIR =
156         new File(System.getProperty("java.io.tmpdir", "/tmp"));
157 
158     /***
159      * Name of the heritrix properties file.
160      */
161     private static final String PROPERTIES = "heritrix.properties";
162 
163     /***
164      * Name of the key to use specifying alternate heritrix properties on
165      * command line.
166      */
167     private static final String PROPERTIES_KEY = PROPERTIES;
168     
169     /***
170      * Prefix used on our properties we'll add to the System.properties list.
171      */
172     private static final String HERITRIX_PROPERTIES_PREFIX = "heritrix.";
173 
174     /***
175      * Prefix used on other properties we'll add to the System.properties 
176      * list (after stripping this prefix). 
177      */
178     private static final String SYSTEM_PREFIX = "system.";
179 
180     /***
181      * Instance of web server if one was started.
182      */
183     private static SimpleHttpServer httpServer = null;
184 
185     /***
186      * CrawlJob handler. Manages multiple crawl jobs at runtime.
187      */
188     private CrawlJobHandler jobHandler = null;
189 
190     /***
191      * Heritrix start log file.
192      *
193      * This file contains standard out produced by this main class for startup
194      * only.  Used by heritrix shell script.  Name here MUST match that in the
195      * <code>bin/heritrix</code> shell script.  This is a DEPENDENCY the shell
196      * wrapper has on this here java heritrix.
197      */
198     private static final String STARTLOG = "heritrix_dmesg.log";
199 
200     /***
201      * Default encoding.
202      * 
203      * Used for content when fetching if none specified.
204      */
205 	public static final String DEFAULT_ENCODING = "ISO-8859-1";
206 
207     /***
208      * Heritrix stderr/stdout log file.
209      *
210      * This file should have nothing in it except messages over which we have
211      * no control (JVM stacktrace, 3rd-party lib emissions).  The wrapper
212      * startup script directs stderr/stdout here. This is an INTERDEPENDENCY
213      * this program has with the wrapper shell script.  Shell can actually
214      * pass us an alternate to use for this file.
215      */
216     private static String DEFAULT_HERITRIX_OUT = "heritrix_out.log";
217 
218     /***
219      * Where to write this classes startup output.
220      * 
221      * This out should only be used if Heritrix is being run from the
222      * command-line.
223      */
224     private static PrintWriter out = null;
225 
226     /***
227      * The org.archive package
228      */
229     private static final String ARCHIVE_PACKAGE = "org.archive.";
230 
231     /***
232      * The crawler package.
233      */
234 	private static final String CRAWLER_PACKAGE = Heritrix.class.getName().
235         substring(0, Heritrix.class.getName().lastIndexOf('.'));
236     
237     /***
238      * The root context for a webapp.
239      */
240     private static final String ROOT_CONTEXT = "/";
241 
242     /***
243      * Set to true if application is started from command line.
244      */
245     private static boolean commandLine = false;
246     
247     /***
248      * True if container initialization has been run.
249      */
250     private static boolean containerInitialized = false;
251     
252     /***
253      * True if properties have been loaded.
254      */
255     private static boolean propertiesLoaded = false;
256     
257     private static final String JAR_SUFFIX = ".jar";
258     
259     private AlertManager alertManager;
260 
261     /***
262      * The context of the GUI webapp.  Default is root.
263      */
264     private static String adminContext = ROOT_CONTEXT;
265     
266     /***
267      * True if we're to put up a GUI.
268      * Cmdline processing can override.
269      */
270     private static boolean gui =
271         !PropertyUtils.getBooleanProperty("heritrix.cmdline.nowui");
272     
273     /***
274      * Port to put the GUI up on.
275      * Cmdline processing can override.
276      */
277     private static int guiPort = SimpleHttpServer.DEFAULT_PORT;
278 
279     
280     /***
281      * A collection containing only localhost.  Used as default value
282      * for guiHosts, and passed to SimpleHttpServer when doing selftest.
283      */
284     final private static Collection<String> LOCALHOST_ONLY =
285      Collections.unmodifiableList(Arrays.asList(new String[] { "127.0.0.1" }));
286 
287     
288     /***
289      * Hosts to bind the GUI webserver to.
290      * By default, only contans localhost.
291      * Set to an empty collection to indicate that all available network
292      * interfaces should be used for the webserver.
293      */
294     private static Collection<String> guiHosts = LOCALHOST_ONLY;
295     
296     
297     /***
298      * Web UI server, realm, context name.
299      */
300     private static String ADMIN = "admin";
301     
302     // OpenMBean support.
303     /***
304      * The MBean server we're registered with (May be null).
305      */
306     private MBeanServer mbeanServer = null;
307     
308     /***
309      * MBean name we were registered as.
310      */
311     private ObjectName mbeanName = null;
312     
313     /***
314      * Keep reference to all instances of Heritrix.
315      * Used by the UI to figure which of the local Heritrice it should
316      * be going against and to figure what to shutdown on the way out (If
317      * there was always a JMX Agent, we wouldn't need to keep this list.  We
318      * could always ask the JMX Agent for all instances. UPDATE: True we could
319      * always ask the JMX Agent but we might keep around this local reference
320      * because it will allow faster, less awkward -- think of marshalling the args
321      * for JMX invoke operation -- access to local Heritrix instances.  A new
322      * usage for this instances Map is in CrawlJob#preRegister to find the hosting
323      * Heritrix instance).
324      */
325     private static Map<String,Heritrix> instances
326      = new Hashtable<String,Heritrix>();
327     
328     private OpenMBeanInfoSupport openMBeanInfo;
329     private final static String STATUS_ATTR = "Status";
330     private final static String VERSION_ATTR = "Version";
331     private final static String ISRUNNING_ATTR = "IsRunning";
332     private final static String ISCRAWLING_ATTR = "IsCrawling";
333     private final static String ALERTCOUNT_ATTR = "AlertCount";
334     private final static String NEWALERTCOUNT_ATTR = "NewAlertCount";
335     private final static String CURRENTJOB_ATTR = "CurrentJob";
336     private final static List ATTRIBUTE_LIST;
337     static {
338         ATTRIBUTE_LIST = Arrays.asList(new String [] {STATUS_ATTR,
339             VERSION_ATTR, ISRUNNING_ATTR, ISCRAWLING_ATTR,
340             ALERTCOUNT_ATTR, NEWALERTCOUNT_ATTR, CURRENTJOB_ATTR});
341     }
342     
343     private final static String START_OPER = "start";
344     private final static String STOP_OPER = "stop";
345     private final static String DESTROY_OPER = "destroy";
346     private final static String INTERRUPT_OPER = "interrupt";
347     private final static String START_CRAWLING_OPER = "startCrawling";
348     private final static String STOP_CRAWLING_OPER = "stopCrawling";
349     private final static String ADD_CRAWL_JOB_OPER = "addJob";
350     private final static String TERMINATE_CRAWL_JOB_OPER =
351         "terminateCurrentJob";
352     private final static String DELETE_CRAWL_JOB_OPER = "deleteJob";
353     private final static String ALERT_OPER = "alert";
354     private final static String ADD_CRAWL_JOB_BASEDON_OPER = "addJobBasedon";
355     private final static String PENDING_JOBS_OPER = "pendingJobs";
356     private final static String COMPLETED_JOBS_OPER = "completedJobs";
357     private final static String CRAWLEND_REPORT_OPER = "crawlendReport";
358     private final static String SHUTDOWN_OPER = "shutdown";
359     private final static String LOG_OPER = "log";
360     private final static String REBIND_JNDI_OPER = "rebindJNDI";
361     private final static List OPERATION_LIST;
362     static {
363         OPERATION_LIST = Arrays.asList(new String [] {START_OPER, STOP_OPER,
364             INTERRUPT_OPER, START_CRAWLING_OPER, STOP_CRAWLING_OPER,
365             ADD_CRAWL_JOB_OPER, ADD_CRAWL_JOB_BASEDON_OPER,
366             DELETE_CRAWL_JOB_OPER, ALERT_OPER, PENDING_JOBS_OPER,
367             COMPLETED_JOBS_OPER, CRAWLEND_REPORT_OPER, SHUTDOWN_OPER,
368             LOG_OPER, DESTROY_OPER, TERMINATE_CRAWL_JOB_OPER,
369             REBIND_JNDI_OPER});
370     }
371     private CompositeType jobCompositeType = null;
372     private TabularType jobsTabularType = null;
373     private static final String [] JOB_KEYS =
374         new String [] {"uid", "name", "status"};
375 
376     private static String adminUsername;
377 
378     private static String adminPassword;
379     
380     /***
381      * Constructor.
382      * Does not register the created instance with JMX.  Assumed this
383      * constructor is used by such as JMX agent creating an instance of
384      * Heritrix at the commmand of a remote client (In this case Heritrix will
385      * be registered by the invoking agent).
386      * @throws IOException
387      */
388     public Heritrix() throws IOException {
389         this(null, false);
390     }
391     
392     public Heritrix(final boolean jmxregister) throws IOException {
393         this(null, jmxregister);
394     }
395     
396     /***
397      * Constructor.
398      * @param name If null, we bring up the default Heritrix instance.
399      * @param jmxregister True if we are to register this instance with JMX
400      * agent.
401      * @throws IOException
402      */
403     public Heritrix(final String name, final boolean jmxregister)
404     throws IOException {
405         this(name, jmxregister, new CrawlJobHandler(getJobsdir()));
406     }
407     
408     /***
409      * Constructor.
410      * @param name If null, we bring up the default Heritrix instance.
411      * @param jmxregister True if we are to register this instance with JMX
412      * agent.
413      * @param cjh CrawlJobHandler to use.
414      * @throws IOException
415      */
416     public Heritrix(final String name, final boolean jmxregister,
417             final CrawlJobHandler cjh)
418     throws IOException {
419         super();
420         containerInitialization();
421         this.jobHandler = cjh;
422         this.openMBeanInfo = buildMBeanInfo();
423         // Set up the alerting system.  SinkHandler is also a global so will
424         // catch alerts for all running Heritrix instances.  Will need to
425         // address (Add name of instance that threw the alert to SinkRecord?).
426         final SinkHandler sinkHandler = SinkHandler.getInstance();
427         if (sinkHandler == null) {
428             throw new NullPointerException("SinkHandler not found.");
429         }
430         // Adapt the alerting system to use SinkHandler.
431         this.alertManager = new AlertManager() {
432             public void add(SinkHandlerLogRecord record) {
433                 sinkHandler.publish(record);
434             }
435 
436             public Vector getAll() {
437                 return sinkHandler.getAll();
438             }
439 
440             public Vector getNewAll() {
441                 return sinkHandler.getAllUnread();
442             }
443 
444             public SinkHandlerLogRecord get(String alertID) {
445                 return sinkHandler.get(Long.parseLong(alertID));
446             }
447             
448             public int getCount() {
449                 return sinkHandler.getCount();
450             }
451 
452             public int getNewCount() {
453                 return sinkHandler.getUnreadCount();
454             }
455 
456             public void remove(String alertID) {
457                 sinkHandler.remove(Long.parseLong(alertID));
458             }
459 
460             public void read(String alertID) {
461                 sinkHandler.read(Long.parseLong(alertID));
462             }
463         };
464         
465         try {
466             Heritrix.registerHeritrix(this, name, jmxregister);
467         } catch (InstanceAlreadyExistsException e) {
468             throw new RuntimeException(e);
469         } catch (MBeanRegistrationException e) {
470             throw new RuntimeException(e);
471         } catch (NotCompliantMBeanException e) {
472             throw new RuntimeException(e);
473         } catch (MalformedObjectNameException e) {
474             throw new RuntimeException(e);
475         }
476     }
477     
478     /***
479      * Run setup tasks for this 'container'. Idempotent.
480      * 
481      * @throws IOException
482      */
483     protected static void containerInitialization() throws IOException {
484         if (Heritrix.containerInitialized) {
485             return;
486         }
487         Heritrix.containerInitialized = true;
488         // Load up the properties.  This invocation adds heritrix properties
489         // to system properties so all available via System.getProperty.
490         // Note, loadProperties and patchLogging have global effects.  May be an
491         // issue if we're running inside a container such as tomcat or jboss.
492         Heritrix.loadProperties();
493         Heritrix.patchLogging();
494         Heritrix.configureTrustStore();
495         // Will run on SIGTERM but not on SIGKILL, unfortunately.
496         // Otherwise, ensures we cleanup after ourselves (Deregister from
497         // JMX and JNDI).
498         Runtime.getRuntime().addShutdownHook(
499             Heritrix.getShutdownThread(false, 0, "Heritrix shutdown hook"));
500         // Register this heritrix 'container' though we may be inside another
501         // tomcat or jboss container.
502         try {
503             registerContainerJndi();
504         } catch (Exception e) {
505             logger.log(Level.WARNING, "Failed jndi container registration.", e);
506         }
507     }
508     
509     /***
510      * Do inverse of construction. Used by anyone who does a 'new Heritrix' when
511      * they want to cleanup the instance.
512      * Of note, there may be Heritrix threads still hanging around after the
513      * call to destroy completes.  They'll eventually go down after they've
514      * finished their cleanup routines.  In particular, if you are watching
515      * Heritrix via JMX, you can see the Heritrix instance JMX bean unregister
516      * ahead of the CrawlJob JMX bean that its hosting.
517      */
518     public void destroy() {
519         stop();
520         try {
521             Heritrix.unregisterHeritrix(this);
522         } catch (InstanceNotFoundException e) {
523             e.printStackTrace();
524         } catch (MBeanRegistrationException e) {
525             e.printStackTrace();
526         } catch (NullPointerException e) {
527             e.printStackTrace();
528         }
529         this.jobHandler = null;
530         this.openMBeanInfo = null;
531     }
532     
533     /***
534      * Launch program.
535      * Optionally will launch a web server to host UI.  Will also register
536      * Heritrix MBean with first found JMX Agent (Usually the 1.5.0 JVM
537      * Agent).
538      * 
539      * @param args Command line arguments.
540      * @throws Exception
541      */
542     public static void main(String[] args)
543     throws Exception {
544         Heritrix.commandLine = true;
545         
546         // Set timezone here.  Would be problematic doing it if we're running
547         // inside in a container.
548         TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
549         
550         File startLog = new File(getHeritrixHome(), STARTLOG);
551         Heritrix.out = new PrintWriter(isDevelopment()? 
552             System.out: new PrintStream(new FileOutputStream(startLog)));
553         
554         try {
555             containerInitialization();
556             String status = doCmdLineArgs(args);
557             if (status != null) {
558                 Heritrix.out.println(status);
559             }
560         }
561 
562         catch(Exception e) {
563             // Show any exceptions in STARTLOG.
564             e.printStackTrace(Heritrix.out);
565             throw e;
566         }
567 
568         finally {
569             // If not development, close the file that signals the wrapper
570             // script that we've started.  Otherwise, just flush it; if in
571             // development, the output is probably a console.
572             if (!isDevelopment()) {
573                 if (Heritrix.out != null) {
574                     Heritrix.out.close();
575                 }
576                 System.out.println("Heritrix version: " +
577                         Heritrix.getVersion());
578             } else {
579                 if (Heritrix.out != null) {
580                     Heritrix.out.flush();
581                 }
582             }
583         }
584     }
585     
586     protected static String doCmdLineArgs(final String [] args)
587     throws Exception {
588         // Get defaults for commandline arguments from the properties file.
589         String tmpStr = PropertyUtils.
590             getPropertyOrNull("heritrix.context");
591         if (tmpStr != null)  {
592             Heritrix.adminContext = tmpStr;
593         }
594         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.port");
595         if (tmpStr != null) {
596             Heritrix.guiPort = Integer.parseInt(tmpStr);
597         }
598         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.admin");
599         String adminLoginPassword = (tmpStr == null)? "": tmpStr;
600         String crawlOrderFile =
601             PropertyUtils.getPropertyOrNull("heritrix.cmdline.order");
602         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.run");
603         boolean runMode =
604             PropertyUtils.getBooleanProperty("heritrix.cmdline.run");
605         boolean selfTest = false;
606         String selfTestName = null;
607         CommandLineParser clp = new CommandLineParser(args, Heritrix.out,
608             Heritrix.getVersion());
609         List arguments = clp.getCommandLineArguments();
610         Option [] options = clp.getCommandLineOptions();
611 
612         // Check passed argument.  Only one argument, the ORDER_FILE is allowed.
613         // If one argument, make sure exists and xml suffix.
614         if (arguments.size() > 1) {
615             clp.usage(1);
616         } else if (arguments.size() == 1) {
617             crawlOrderFile = (String)arguments.get(0);
618             if (!(new File(crawlOrderFile).exists())) {
619                 clp.usage("ORDER.XML <" + crawlOrderFile +
620                     "> specified does not exist.", 1);
621             }
622             // Must end with '.xml'
623             if (crawlOrderFile.length() > 4 &&
624                     !crawlOrderFile.substring(crawlOrderFile.length() - 4).
625                         equalsIgnoreCase(".xml")) {
626                 clp.usage("ORDER.XML <" + crawlOrderFile +
627                     "> does not have required '.xml' suffix.", 1);
628             }
629         }
630 
631         // Now look at options passed.
632         for (int i = 0; i < options.length; i++) {
633             switch(options[i].getId()) {
634                 case 'h':
635                     clp.usage();
636                     break;
637 
638                 case 'a':
639                     adminLoginPassword = options[i].getValue();
640                     break;
641 
642                 case 'n':
643                     if (crawlOrderFile == null) {
644                         clp.usage("You must specify an ORDER_FILE with" +
645                             " '--nowui' option.", 1);
646                     }
647                     Heritrix.gui = false;
648                     break;
649                 
650                 case 'b':
651                     Heritrix.guiHosts = parseHosts(options[i].getValue());
652                     break;
653 
654                 case 'p':
655                     try {
656                         Heritrix.guiPort =
657                             Integer.parseInt(options[i].getValue());
658                     } catch (NumberFormatException e) {
659                         clp.usage("Failed parse of port number: " +
660                             options[i].getValue(), 1);
661                     }
662                     if (Heritrix.guiPort <= 0) {
663                         clp.usage("Nonsensical port number: " +
664                             options[i].getValue(), 1);
665                     }
666                     break;
667 
668                 case 'r':
669                     runMode = true;
670                     break;
671 
672                 case 's':
673                     selfTestName = options[i].getValue();
674                     selfTest = true;
675                     break;
676 
677                 default:
678                     assert false: options[i].getId();
679             }
680         }
681 
682         // Ok, we should now have everything to launch the program.
683         String status = null;
684         if (selfTest) {
685             // If more than just '--selftest' and '--port' passed, then
686             // there is confusion on what is being asked of us.  Print usage
687             // rather than proceed.
688             for (int i = 0; i < options.length; i++) {
689                 if (options[i].getId() != 'p' && options[i].getId() != 's') {
690                     clp.usage(1);
691                 }
692             }
693 
694             if (arguments.size() > 0) {
695                 // No arguments accepted by selftest.
696                 clp.usage(1);
697             }
698             status = selftest(selfTestName, Heritrix.guiPort);
699         } else {
700 			if (!Heritrix.gui) {
701 				if (options.length > 1) {
702 					// If more than just '--nowui' passed, then there is
703 					// confusion on what is being asked of us. Print usage
704 					// rather than proceed.
705 					clp.usage(1);
706 				}
707 				Heritrix h = new Heritrix(true);
708 				status = h.doOneCrawl(crawlOrderFile);
709 			} else {
710                 if (!isValidLoginPasswordString(adminLoginPassword)) {
711                     // exit printing usage info if no webui login:password given
712                     clp.usage("Invalid admin login:password value, or none "
713                             + "specified. ", 1);
714                 }
715 				status = startEmbeddedWebserver(
716                         Heritrix.guiHosts, Heritrix.guiPort,
717 						adminLoginPassword);
718 				Heritrix h = new Heritrix(true);
719 
720 				String tmp = h.launch(crawlOrderFile, runMode);
721 				if (tmp != null) {
722 					status += ('\n' + tmp);
723 				}
724 			}
725 		}
726         return status;
727     }
728     
729     /***
730 	 * @return The file we dump stdout and stderr into.
731 	 */
732     public static String getHeritrixOut() {
733         String tmp = System.getProperty("heritrix.out");
734         if (tmp == null || tmp.length() == 0) {
735             tmp = Heritrix.DEFAULT_HERITRIX_OUT;
736         }
737         return tmp;
738     }
739 
740     /***
741      * Exploit <code>-Dheritrix.home</code> if available to us.
742      * Is current working dir if no heritrix.home property supplied.
743      * @return Heritrix home directory.
744      * @throws IOException
745      */
746     protected static File getHeritrixHome()
747     throws IOException {
748         File heritrixHome = null;
749         String home = System.getProperty("heritrix.home");
750         if (home != null && home.length() > 0) {
751             heritrixHome = new File(home);
752             if (!heritrixHome.exists()) {
753                 throw new IOException("HERITRIX_HOME <" + home +
754                     "> does not exist.");
755             }
756         } else {
757             heritrixHome = new File(new File("").getAbsolutePath());
758         }
759         return heritrixHome;
760     }
761     
762     /***
763      * @return The directory into which we put jobs.  If the system property
764      * 'heritrix.jobsdir' is set, we will use its value in place of the default
765      * 'jobs' directory in the current working directory.
766      * @throws IOException
767      */
768     public static File getJobsdir() throws IOException {
769         Heritrix.loadProperties(); // if called in constructor
770         String jobsdirStr = System.getProperty("heritrix.jobsdir", "jobs");
771         File jobsdir = new File(jobsdirStr);
772         return (jobsdir.isAbsolute())?
773             jobsdir:
774             new File(getHeritrixHome(), jobsdirStr);
775     }
776     
777     /***
778      * Get and check for existence of expected subdir.
779      *
780      * If development flag set, then look for dir under src dir.
781      *
782      * @param subdirName Dir to look for.
783      * @return The extant subdir.  Otherwise null if we're running
784      * in a webapp context where there is no conf directory available.
785      * @throws IOException if unable to find expected subdir.
786      */
787     protected static File getSubDir(String subdirName)
788     throws IOException {
789         return getSubDir(subdirName, true);
790     }
791     
792     /***
793      * Get and optionally check for existence of subdir.
794      *
795      * If development flag set, then look for dir under src dir.
796      *
797      * @param subdirName Dir to look for.
798      * @param fail True if we are to fail if directory does not
799      * exist; false if we are to return false if the directory does not exist.
800      * @return The extant subdir.  Otherwise null if we're running
801      * in a webapp context where there is no subdir directory available.
802      * @throws IOException if unable to find expected subdir.
803      */
804     protected static File getSubDir(String subdirName, boolean fail)
805     throws IOException {
806         String path = isDevelopment()?
807             "src" + File.separator + subdirName:
808             subdirName;
809         File dir = new File(getHeritrixHome(), path);
810         if (!dir.exists()) {
811             if (fail) {
812                 throw new IOException("Cannot find subdir: " + subdirName);
813             }
814             dir = null;
815         }
816         return dir;
817     }
818     
819     /***
820      * Test string is valid login/password string.
821      *
822      * A valid login/password string has the login and password compounded
823      * w/ a ':' delimiter.
824      *
825      * @param str String to test.
826      * @return True if valid password/login string.
827      */
828     protected static boolean isValidLoginPasswordString(String str) {
829         boolean isValid = false;
830         StringTokenizer tokenizer = new StringTokenizer(str,  ":");
831         if (tokenizer.countTokens() == 2) {
832             String login = ((String)tokenizer.nextElement()).trim();
833             String password = ((String)tokenizer.nextElement()).trim();
834             if (login.length() > 0 && password.length() > 0) {
835                 isValid = true;
836             }
837         }
838         return isValid;
839     }
840 
841     protected static boolean isDevelopment() {
842         return System.getProperty("heritrix.development") != null;
843     }
844 
845     /***
846      * Load the heritrix.properties file.
847      * 
848      * Adds any property that starts with
849      * <code>HERITRIX_PROPERTIES_PREFIX</code>
850      * or <code>ARCHIVE_PACKAGE</code>
851      * into system properties (except logging '.level' directives).
852      * @return Loaded properties.
853      * @throws IOException
854      */
855     protected static Properties loadProperties()
856     throws IOException {
857         if (Heritrix.propertiesLoaded) {
858             return System.getProperties();
859         }
860         Heritrix.propertiesLoaded = true;
861             
862         Properties properties = new Properties();
863         properties.load(getPropertiesInputStream());
864         
865         // Any property that begins with ARCHIVE_PACKAGE, make it
866         // into a system property. While iterating, check to see if anything
867         // defined on command-line, and if so, it overrules whats in
868         // heritrix.properties.
869         for (Enumeration e = properties.keys(); e.hasMoreElements();) {
870             String key = ((String)e.nextElement()).trim();
871         	if (key.startsWith(ARCHIVE_PACKAGE) ||
872                     key.startsWith(HERITRIX_PROPERTIES_PREFIX)) {
873                 // Don't add the heritrix.properties entries that are
874                 // changing the logging level of particular classes.
875                 String value = properties.getProperty(key).trim();
876                 if (key.indexOf(".level") < 0) {
877                     copyToSystemProperty(key, value);
878                 }
879             }  else if (key.startsWith(SYSTEM_PREFIX)) {
880                 String value = properties.getProperty(key).trim();
881                 copyToSystemProperty(key.substring(SYSTEM_PREFIX.length()), value); 
882             }
883         }
884         return properties;
885     }
886 
887     /***
888      * Copy the given key-value into System properties, as long as there
889      * is no existing value. 
890      * @param key property key 
891      * @param value property value
892      */
893     protected static void copyToSystemProperty(String key, String value) {
894         if (System.getProperty(key) == null ||
895             System.getProperty(key).length() == 0) {
896             System.setProperty(key, value);
897         }
898     }
899 
900     protected static InputStream getPropertiesInputStream()
901     throws IOException {
902         File file = null;
903         // Look to see if properties have been passed on the cmd-line.
904         String alternateProperties = System.getProperty(PROPERTIES_KEY);
905         if (alternateProperties != null && alternateProperties.length() > 0) {
906             file = new File(alternateProperties);
907         }
908         // Get properties from conf directory if one available.
909         if ((file == null || !file.exists()) && getConfdir(false) != null) {
910             file = new File(getConfdir(), PROPERTIES);
911             if (!file.exists()) {
912                 // If no properties file in the conf dir, set file back to
913                 // null so we go looking for heritrix.properties on classpath.
914                 file = null;
915             }
916         }
917         // If not on the command-line, there is no conf dir. Then get the
918         // properties from the CLASSPATH (Classpath file separator is always
919         // '/', whatever the platform.
920         InputStream is = (file != null)?
921             new FileInputStream(file):
922             Heritrix.class.getResourceAsStream("/" + PROPERTIES_KEY);
923         if (is == null) {
924             throw new IOException("Failed to load properties file from" +
925                 " filesystem or from classpath.");
926         }
927         return is;
928     }
929 
930     /***
931      * If the user hasn't altered the default logging parameters, tighten them
932      * up somewhat: some of our libraries are way too verbose at the INFO or
933      * WARNING levels.
934      * 
935      * This might be a problem running inside in someone else's
936      * container.  Container's seem to prefer commons logging so we
937      * ain't messing them doing the below.
938      *
939      * @throws IOException
940      * @throws SecurityException
941      */
942     protected static void patchLogging()
943     throws SecurityException, IOException {
944         if (System.getProperty("java.util.logging.config.class") != null) {
945             return;
946         }
947 
948         if (System.getProperty("java.util.logging.config.file") != null) {
949             return;
950         }
951 
952         // No user-set logging properties established; use defaults
953         // from distribution-packaged 'heritrix.properties'.
954         LogManager.getLogManager().
955             readConfiguration(getPropertiesInputStream());
956     }
957 
958     /***
959      * Configure our trust store.
960      *
961      * If system property is defined, then use it for our truststore.  Otherwise
962      * use the heritrix truststore under conf directory if it exists.
963      * 
964      * <p>If we're not launched from the command-line, we will not be able
965      * to find our truststore.  The truststore is nor normally used so rare
966      * should this be a problem (In case where we don't use find our trust
967      * store, we'll use the 'default' -- either the JVMs or the containers).
968      */
969     protected static void configureTrustStore() {
970         // Below must be defined in jsse somewhere but can' find it.
971         final String TRUSTSTORE_KEY = "javax.net.ssl.trustStore";
972         String value = System.getProperty(TRUSTSTORE_KEY);
973         File confdir = null;
974         try {
975             confdir = getConfdir(false);
976         } catch (IOException e) {
977             logger.log(Level.WARNING, "Failed to get confdir.", e);
978         }
979         if ((value == null || value.length() <= 0) && confdir != null) {
980             // Use the heritrix store if it exists on disk.
981             File heritrixStore = new File(confdir, "heritrix.cacerts");
982             if(heritrixStore.exists()) {
983                 value = heritrixStore.getAbsolutePath();
984             }
985         }
986 
987         if (value != null && value.length() > 0) {
988             System.setProperty(TRUSTSTORE_KEY, value);
989         }
990     }
991 
992     /***
993      * Run the selftest
994      *
995      * @param oneSelfTestName Name of a test if we are to run one only rather
996      * than the default running all tests.
997      * @param port Port number to use for web UI.
998      *
999      * @exception Exception
1000      * @return Status of how selftest startup went.
1001      */
1002     protected static String selftest(final String oneSelfTestName,
1003             final int port)
1004         throws Exception {
1005         // Put up the webserver w/ the root and selftest webapps only.
1006         final String SELFTEST = "selftest";
1007         Heritrix.httpServer = new SimpleHttpServer(SELFTEST,
1008             Heritrix.adminContext, LOCALHOST_ONLY, port, true);
1009         // Set up digest auth for a section of the server so selftest can run
1010         // auth tests.  Looks like can only set one login realm going by the
1011         // web.xml dtd.  Otherwise, would be nice to selftest basic and digest.
1012         // Have login, password and role all be SELFTEST.  Must match what is
1013         // in the selftest order.xml file.
1014         Heritrix.httpServer.setAuthentication(SELFTEST, Heritrix.adminContext,
1015             SELFTEST, SELFTEST, SELFTEST);
1016         Heritrix.httpServer.startServer();
1017         // Get the order file from the CLASSPATH unless we're running in dev
1018         // environment.
1019         File selftestDir = (isDevelopment())?
1020             new File(getConfdir(), SELFTEST):
1021             new File(File.separator + SELFTEST);
1022         File crawlOrderFile = new File(selftestDir, "order.xml");
1023         // Create a job based off the selftest order file.  Then use this as
1024         // a template to pass jobHandler.newJob().  Doing this gets our
1025         // selftest output to show under the jobs directory.
1026         // Pass as a seed a pointer to the webserver we just put up.
1027         final String ROOTURI = "127.0.0.1:" + Integer.toString(port);
1028         String selfTestUrl = "http://" + ROOTURI + '/';
1029         if (oneSelfTestName != null && oneSelfTestName.length() > 0) {
1030             selfTestUrl += (oneSelfTestName + '/');
1031         }
1032         CrawlJobHandler cjh = new SelfTestCrawlJobHandler(getJobsdir(),
1033                 oneSelfTestName, selfTestUrl);
1034         Heritrix h = new Heritrix("Selftest", true, cjh);
1035         CrawlJob job = createCrawlJob(cjh, crawlOrderFile, "Template");
1036         job = h.getJobHandler().newJob(job, null, SELFTEST,
1037             "Integration self test", selfTestUrl, CrawlJob.PRIORITY_AVERAGE);
1038         h.getJobHandler().addJob(job);
1039         // Before we start, need to change some items in the settings file.
1040         CredentialStore cs = (CredentialStore)job.getSettingsHandler().
1041             getOrder().getAttribute(CredentialStore.ATTR_NAME);
1042         for (Iterator i = cs.iterator(null); i.hasNext();) {
1043             ((Credential)i.next()).setCredentialDomain(null, ROOTURI);
1044         }
1045         h.getJobHandler().startCrawler();
1046         StringBuffer buffer = new StringBuffer();
1047         buffer.append("Heritrix " + Heritrix.getVersion() +
1048                 " selftest started.");
1049         buffer.append("\nSelftest first crawls " + selfTestUrl +
1050             " and then runs an analysis.");
1051         buffer.append("\nResult of analysis printed to " +
1052             getHeritrixOut() + " when done.");
1053         buffer.append("\nSelftest job directory for logs and arcs:\n" +
1054             job.getDirectory().getAbsolutePath());
1055         return buffer.toString();
1056     }
1057 
1058     /***
1059      * Launch the crawler without a web UI and run the passed crawl only.
1060      * 
1061      * Specialized version of {@link #launch()}.
1062      *
1063      * @param crawlOrderFile The crawl order to crawl.
1064      * @throws InitializationException
1065      * @throws InvalidAttributeValueException
1066      * @return Status string.
1067      */
1068     protected String doOneCrawl(String crawlOrderFile)
1069     throws InitializationException, InvalidAttributeValueException {
1070         return doOneCrawl(crawlOrderFile, null);
1071     }
1072     
1073     /***
1074      * Launch the crawler without a web UI and run passed crawl only.
1075      * 
1076      * Specialized version of {@link #launch()}.
1077      *
1078      * @param crawlOrderFile The crawl order to crawl.
1079      * @param listener Register this crawl status listener before starting
1080      * crawl (You can use this listener to notice end-of-crawl).
1081      * @throws InitializationException
1082      * @throws InvalidAttributeValueException
1083      * @return Status string.
1084      */
1085     protected String doOneCrawl(String crawlOrderFile,
1086         CrawlStatusListener listener)
1087     throws InitializationException, InvalidAttributeValueException {
1088         XMLSettingsHandler handler =
1089             new XMLSettingsHandler(new File(crawlOrderFile));
1090         handler.initialize();
1091         CrawlController controller = new CrawlController();
1092         controller.initialize(handler);
1093         if (listener != null) {
1094             controller.addCrawlStatusListener(listener);
1095         }
1096         controller.requestCrawlStart();
1097         return "Crawl started using " + crawlOrderFile + ".";
1098     }
1099     
1100     /***
1101      * Launch the crawler for a web UI.
1102      *
1103      * Crawler hangs around waiting on jobs.
1104      *
1105      * @exception Exception
1106      * @return A status string describing how the launch went.
1107      * @throws Exception
1108      */
1109     public String launch() throws Exception {
1110         return launch(null, false);
1111     }
1112 
1113     /***
1114      * Launch the crawler for a web UI.
1115      *
1116      * Crawler hangs around waiting on jobs.
1117      * 
1118      * @param crawlOrderFile File to crawl.  May be null.
1119      * @param runMode Whether crawler should be set to run mode.
1120      *
1121      * @exception Exception
1122      * @return A status string describing how the launch went.
1123      */
1124     public String launch(String crawlOrderFile, boolean runMode)
1125     throws Exception {
1126         String status = null;
1127         if (crawlOrderFile != null) {
1128             addCrawlJob(crawlOrderFile, "Autolaunched", "", "");
1129             if(runMode) {
1130                 this.jobHandler.startCrawler();
1131                 status = "Job being crawled: " + crawlOrderFile;
1132             } else {
1133                 status = "Crawl job ready and pending: " + crawlOrderFile;
1134             }
1135         } else if(runMode) {
1136             // The use case is that jobs are to be run on a schedule and that
1137             // if the crawler is in run mode, then the scheduled job will be
1138             // run at appropriate time.  Otherwise, not.
1139             this.jobHandler.startCrawler();
1140             status = "Crawler set to run mode.";
1141         }
1142         return status;
1143     }
1144     
1145     /***
1146      * Start up the embedded Jetty webserver instance.
1147      * This is done when we're run from the command-line.
1148      * @param port Port number to use for web UI.
1149      * @param adminLoginPassword Compound of login and password.
1150      * @throws Exception
1151      * @return Status on webserver startup.
1152      * @deprecated  Use startEmbeddedWebserver(hosts, port, adminLoginPassword)
1153      */
1154     protected static String startEmbeddedWebserver(final int port,
1155         final boolean lho, final String adminLoginPassword)
1156     throws Exception {
1157         ArrayList<String> hosts = new ArrayList<String>();
1158         if (lho) {
1159             hosts.add("127.0.0.1");
1160         }
1161         return startEmbeddedWebserver(hosts, port, adminLoginPassword);
1162     }
1163 
1164     
1165     /***
1166      * Parses a list of host names.
1167      * 
1168      * <p>If the given string is <code>/</code>, then an empty
1169      * collection is returned.  This indicates that all available network
1170      * interfaces should be used.
1171      * 
1172      * <p>Otherwise, the string must contain a comma-separated list of 
1173      * IP addresses or host names.  The parsed list is then returned.
1174      * 
1175      * @param hosts  the string to parse
1176      * @return  the parsed collection of hosts 
1177      */
1178     private static Collection<String> parseHosts(String hosts) {
1179         hosts = hosts.trim();
1180         if (hosts.equals("/")) {
1181             return new ArrayList<String>(1);
1182         }
1183         String[] hostArray = hosts.split(",");
1184         for (int i = 0; i < hostArray.length; i++) {
1185             hostArray[i] = hostArray[i].trim();
1186         }
1187         return Arrays.asList(hostArray);
1188     }
1189     
1190     /***
1191      * Start up the embedded Jetty webserver instance.
1192      * This is done when we're run from the command-line.
1193      * 
1194      * @param hosts  a list of IP addresses or hostnames to bind to, or an
1195      *               empty collection to bind to all available network 
1196      *               interfaces
1197      * @param port Port number to use for web UI.
1198      * @param adminLoginPassword Compound of login and password.
1199      * @throws Exception
1200      * @return Status on webserver startup.
1201      */
1202     protected static String startEmbeddedWebserver(Collection<String> hosts, 
1203         int port, String adminLoginPassword) 
1204     throws Exception {
1205         adminUsername = adminLoginPassword.
1206             substring(0, adminLoginPassword.indexOf(":"));
1207         adminPassword = adminLoginPassword.
1208             substring(adminLoginPassword.indexOf(":") + 1);
1209         Heritrix.httpServer = new SimpleHttpServer("admin",
1210             Heritrix.adminContext, hosts, port, false);
1211         
1212         final String DOTWAR = ".war";
1213         final String SELFTEST = "selftest";
1214         
1215         // Look for additional WAR files beyond 'selftest' and 'admin'.
1216         File[] wars = getWarsdir().listFiles();
1217         for(int i = 0; i < wars.length; i++) {
1218             if(wars[i].isFile()) {
1219                 final String warName = wars[i].getName();
1220                 final String warNameNC = warName.toLowerCase();
1221                 if(warNameNC.endsWith(DOTWAR) &&
1222                         !warNameNC.equals(ADMIN + DOTWAR) &&
1223                         !warNameNC.equals(SELFTEST + DOTWAR)) {
1224                     int dot = warName.indexOf('.');
1225                     Heritrix.httpServer.addWebapp(warName.substring(0, dot),
1226                             null, true);
1227                 }
1228             }
1229         }
1230         
1231         // Name of passed 'realm' must match what is in configured in web.xml.
1232         // We'll use ROLE for 'realm' and 'role'.
1233         final String ROLE = ADMIN;
1234         Heritrix.httpServer.setAuthentication(ROLE, Heritrix.adminContext,
1235             adminUsername, adminPassword, ROLE);
1236         Heritrix.httpServer.startServer();
1237         StringBuffer buffer = new StringBuffer();
1238         buffer.append("Heritrix " + Heritrix.getVersion() + " is running.");
1239         for (String host: httpServer.getHosts()) {
1240             buffer.append("\nWeb console is at: http://");
1241             buffer.append(host).append(':').append(port);
1242         }
1243         buffer.append("\nWeb console login and password: " +
1244             adminUsername + "/" + adminPassword);
1245         return buffer.toString();
1246     }
1247     
1248     /***
1249      * Replace existing administrator login info with new info.
1250      * 
1251      * @param newUsername new administrator login username
1252      * @param newPassword new administrator login password
1253      */
1254     public static void resetAuthentication(String newUsername,
1255             String newPassword) {
1256         Heritrix.httpServer.resetAuthentication(ADMIN, adminUsername,
1257                 newUsername, newPassword);
1258         adminUsername = newUsername;
1259         adminPassword = newPassword; 
1260         logger.info("administrative login changed to "
1261                 +newUsername+":"+newPassword);
1262     }
1263 
1264     protected static CrawlJob createCrawlJob(CrawlJobHandler handler,
1265             File crawlOrderFile, String name)
1266     throws InvalidAttributeValueException {
1267         XMLSettingsHandler settings = new XMLSettingsHandler(crawlOrderFile);
1268         settings.initialize();
1269         return new CrawlJob(handler.getNextJobUID(), name, settings,
1270             new CrawlJobErrorHandler(Level.SEVERE),
1271             CrawlJob.PRIORITY_HIGH,
1272             crawlOrderFile.getAbsoluteFile().getParentFile());
1273     }
1274     
1275     /***
1276      * This method is called when we have an order file to hand that we want
1277      * to base a job on.  It leaves the order file in place and just starts up
1278      * a job that uses all the order points to for locations for logs, etc.
1279      * @param orderPathOrUrl Path to an order file or to a seeds file.
1280      * @param name Name to use for this job.
1281      * @param description 
1282      * @param seeds 
1283      * @return A status string.
1284      * @throws IOException 
1285      * @throws FatalConfigurationException 
1286      */
1287     public String addCrawlJob(String orderPathOrUrl, String name,
1288             String description, String seeds)
1289     throws IOException, FatalConfigurationException {
1290         if (!UURI.hasScheme(orderPathOrUrl)) {
1291             // Assume its a file path.
1292             return addCrawlJob(new File(orderPathOrUrl), name, description,
1293                     seeds);
1294         }
1295 
1296         // Otherwise, must be an URL.
1297         URL url = new URL(orderPathOrUrl);
1298 
1299         // Handle http and file only for now (Tried to handle JarUrlConnection
1300         // but too awkward undoing jar stream.  Rather just look for URLs that
1301         // end in '.jar').
1302         String result = null;
1303         URLConnection connection = url.openConnection();
1304         if (connection instanceof HttpURLConnection) {
1305             result = addCrawlJob(url, (HttpURLConnection)connection, name,
1306                 description, seeds);
1307         } else if (connection instanceof FileURLConnection) {
1308             result = addCrawlJob(new File(url.getPath()), name, description,
1309                 seeds);
1310         } else {
1311             throw new UnsupportedOperationException("No support for "
1312                 + connection);
1313         }
1314 
1315         return result;
1316     }
1317     
1318     protected String addCrawlJob(final URL url,
1319             final HttpURLConnection connection,
1320             final String name, final String description, final String seeds)
1321     throws IOException, FatalConfigurationException {
1322         // Look see if its a jar file.  If it is undo it.
1323         boolean isJar = url.getPath() != null &&
1324             url.getPath().toLowerCase().endsWith(JAR_SUFFIX);
1325         // If http url connection, bring down the resource local.
1326         File localFile = File.createTempFile(Heritrix.class.getName(),
1327            isJar? JAR_SUFFIX: null, TMPDIR);
1328         connection.connect();
1329         String result = null;
1330         try {
1331             IoUtils.readFullyToFile(connection.getInputStream(), localFile);
1332             result = addCrawlJob(localFile, name, description, seeds);
1333         } catch (IOException ioe) {
1334             // Cleanup if an Exception.
1335             localFile.delete();
1336             localFile = null;
1337         } finally {
1338              connection.disconnect();
1339              // If its a jar file, then we made a job based on the jar contents.
1340              // Its no longer needed.  Remove it.  If not a jar file, then leave
1341              // the file around because the job depends on it.
1342              if (isJar && localFile != null && localFile.exists()) {
1343                  localFile.delete();
1344              }
1345         }
1346         return result;
1347     }
1348     
1349     protected String addCrawlJob(final File order, final String name,
1350             final String description, final String seeds)
1351     throws FatalConfigurationException, IOException {
1352         CrawlJob addedJob = null;
1353         if (this.jobHandler == null) {
1354             throw new NullPointerException("Heritrix jobhandler is null.");
1355         }
1356         try {
1357             if (order.getName().toLowerCase().endsWith(JAR_SUFFIX)) {
1358                 return addCrawlJobBasedonJar(order, name, description, seeds);
1359             }
1360             addedJob = this.jobHandler.
1361                 addJob(createCrawlJob(this.jobHandler, order, name));
1362         } catch (InvalidAttributeValueException e) {
1363             FatalConfigurationException fce = new FatalConfigurationException(
1364                 "Converted InvalidAttributeValueException on " +
1365                 order.getAbsolutePath() + ": " + e.getMessage());
1366             fce.setStackTrace(e.getStackTrace());
1367         }
1368         return addedJob != null? addedJob.getUID(): null;
1369     }
1370     
1371     /***
1372      * Undo jar file and use as basis for a new job.
1373      * @param jarFile Pointer to file that holds jar.
1374      * @param name Name to use for new job.
1375      * @param description 
1376      * @param seeds 
1377      * @return Message.
1378      * @throws IOException
1379      * @throws FatalConfigurationException
1380      */
1381     protected String addCrawlJobBasedonJar(final File jarFile,
1382             final String name, final String description, final String seeds)
1383     throws IOException, FatalConfigurationException {
1384         if (jarFile == null || !jarFile.exists()) {
1385             throw new FileNotFoundException(jarFile.getAbsolutePath());
1386         }
1387         // Create a directory with a tmp name.  Do it by first creating file,
1388         // removing it, then creating the directory. There is a hole during
1389         // which the OS may put a file of same exact name in our way but
1390         // unlikely.
1391         File dir = File.createTempFile(Heritrix.class.getName(), ".expandedjar",
1392             TMPDIR);
1393         dir.delete();
1394         dir.mkdir();
1395         try {
1396             org.archive.crawler.util.IoUtils.unzip(jarFile, dir);
1397             // Expect to find an order file at least.
1398             File orderFile = new File(dir, "order.xml");
1399             if (!orderFile.exists()) {
1400                 throw new IOException("Missing order: " +
1401                     orderFile.getAbsolutePath());
1402             }
1403             CrawlJob job =
1404                 createCrawlJobBasedOn(orderFile, name, description, seeds);
1405             // Copy into place any seeds and settings directories before we
1406             // add job to Heritrix to crawl.
1407             File seedsFile = new File(dir, "seeds.txt");
1408             if (seedsFile.exists()) {
1409                 FileUtils.copyFiles(seedsFile, new File(job.getDirectory(),
1410                     seedsFile.getName()));
1411             }
1412             addCrawlJob(job);
1413             return job.getUID();
1414          } finally {
1415              // After job has been added, no more need of expanded content.
1416              // (Let the caller be responsible for cleanup of jar. Sometimes
1417              // its should be deleted -- when its a local copy of a jar pulled
1418              // across the net -- wherease other times, if its a jar passed
1419              // in w/ a 'file' scheme, it shouldn't be deleted.
1420              org.archive.util.FileUtils.deleteDir(dir);
1421          }
1422     }
1423     
1424     public String addCrawlJobBasedOn(String jobUidOrProfile,
1425             String name, String description, String seeds) {
1426         try {
1427             CrawlJob cj = getJobHandler().getJob(jobUidOrProfile);
1428             if (cj == null) {
1429                 throw new InvalidAttributeValueException(jobUidOrProfile +
1430                     " is not a job UID or profile name (Job UIDs are " +
1431                     " usually the 14 digit date portion of job name).");
1432             }
1433             CrawlJob job = addCrawlJobBasedOn(
1434                 cj.getSettingsHandler().getOrderFile(), name, description,
1435                     seeds);
1436             return job.getUID();
1437         } catch (Exception e) {
1438             e.printStackTrace();
1439             return "Exception on " + jobUidOrProfile + ": " + e.getMessage();
1440         } 
1441     }
1442     
1443     protected CrawlJob addCrawlJobBasedOn(final File orderFile,
1444         final String name, final String description, final String seeds)
1445     throws FatalConfigurationException {
1446         return addCrawlJob(createCrawlJobBasedOn(orderFile, name, description,
1447                 seeds));
1448     }
1449     
1450     protected CrawlJob createCrawlJobBasedOn(final File orderFile,
1451             final String name, final String description, final String seeds)
1452     throws FatalConfigurationException {
1453         CrawlJob job = getJobHandler().newJob(orderFile, name, description,
1454                 seeds);
1455         return CrawlJobHandler.ensureNewJobWritten(job, name, description);
1456     }
1457     
1458     protected CrawlJob addCrawlJob(final CrawlJob job) {
1459         return getJobHandler().addJob(job);
1460     }
1461     
1462     public void startCrawling() {
1463         if (getJobHandler() == null) {
1464             throw new NullPointerException("Heritrix jobhandler is null.");
1465         }
1466         getJobHandler().startCrawler();
1467     }
1468 
1469     public void stopCrawling() {
1470         if (getJobHandler() == null) {
1471             throw new NullPointerException("Heritrix jobhandler is null.");
1472         }
1473         getJobHandler().stopCrawler();
1474     }
1475     
1476     /***
1477      * Get the heritrix version.
1478      *
1479      * @return The heritrix version.  May be null.
1480      */
1481     public static String getVersion() {
1482         return System.getProperty("heritrix.version");
1483     }
1484 
1485     /***
1486      * Get the job handler
1487      *
1488      * @return The CrawlJobHandler being used.
1489      */
1490     public CrawlJobHandler getJobHandler() {
1491         return this.jobHandler;
1492     }
1493 
1494     /***
1495      * Get the configuration directory.
1496      * @return The conf directory under HERITRIX_HOME or null if none can
1497      * be found.
1498      * @throws IOException
1499      */
1500     public static File getConfdir()
1501     throws IOException {
1502         return getConfdir(true);
1503     }
1504 
1505     /***
1506      * Get the configuration directory.
1507      * @param fail Throw IOE if can't find directory if true, else just
1508      * return null.
1509      * @return The conf directory under HERITRIX_HOME or null (or an IOE) if
1510      * can't be found.
1511      * @throws IOException
1512      */
1513     public static File getConfdir(final boolean fail)
1514     throws IOException {
1515         final String key = "heritrix.conf";
1516         // Look to see if heritrix.conf property passed on the cmd-line.
1517         String tmp = System.getProperty(key);
1518         // if not fall back to default $HERITIX_HOME/conf
1519         if (tmp == null || tmp.length() == 0) {
1520             return getSubDir("conf", fail);
1521         }
1522         File dir = new File(tmp);
1523         if (!dir.exists()) {
1524             if (fail) {
1525                 throw new IOException("Cannot find conf dir: " + tmp);
1526             } else {
1527                 logger.log(Level.WARNING, "Specified " + key +
1528                     " dir does not exist.  Falling back on default");
1529             }
1530             dir = getSubDir("conf", fail);
1531         }
1532         return dir;
1533     }
1534 
1535     /***
1536      * @return Returns the httpServer. May be null if one was not started.
1537      */
1538     public static SimpleHttpServer getHttpServer() {
1539         return Heritrix.httpServer;
1540     }
1541 
1542     /***
1543      * @throws IOException
1544      * @return Returns the directory under which reside the WAR files
1545      * we're to load into the servlet container.
1546      */
1547     public static File getWarsdir()
1548     throws IOException {
1549         return getSubDir("webapps");
1550     }
1551 
1552     /***
1553      * Prepars for program shutdown. This method does it's best to prepare the
1554      * program so that it can exit normally. It will kill the httpServer and
1555      * terminate any running job.<br>
1556      * It is advisible to wait a few (~1000) millisec after calling this method
1557      * and before calling performHeritrixShutDown() to allow as many threads as
1558      * possible to finish what they are doing.
1559      */
1560     public static void prepareHeritrixShutDown() {
1561         // Stop and destroy all running Heritrix instances.
1562         // Get array of the key set to avoid CCEs for case where call to
1563         // destroy does a remove of an instance from Heritrix.instances.
1564         final Object [] keys = Heritrix.instances.keySet().toArray();
1565         for (int i = 0; i < keys.length; i++) {
1566             ((Heritrix)Heritrix.instances.get(keys[i])).destroy();
1567         }
1568         
1569         try {
1570             deregisterJndi(getJndiContainerName());
1571         } catch (NameNotFoundException e) {
1572             // We were probably unbound already. Ignore.
1573             logger.log(Level.WARNING, "deregistration of jndi", e);
1574         } catch (Exception e) {
1575             e.printStackTrace();
1576         }
1577         
1578         if(Heritrix.httpServer != null) {
1579             // Shut down the web access.
1580             try {
1581                 Heritrix.httpServer.stopServer();
1582             } catch (InterruptedException e) {
1583                 // Generally this can be ignored, but we'll print a stack trace
1584                 // just in case.
1585                 e.printStackTrace();
1586             } finally {
1587                 Heritrix.httpServer = null;
1588             }
1589         }
1590     }
1591 
1592     /***
1593      * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1594      * prior to this method.
1595      */
1596     public static void performHeritrixShutDown() {
1597         performHeritrixShutDown(0);
1598     }
1599 
1600     /***
1601      * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1602      * prior to this method.
1603      *
1604      * @param exitCode Code to pass System.exit.
1605      *
1606      */
1607     public static void performHeritrixShutDown(int exitCode) {
1608         System.exit(exitCode);
1609     }
1610 
1611     /***
1612      * Shutdown all running heritrix instances and the JVM.
1613      * Assumes stop has already been called.
1614 	 * @param exitCode Exit code to pass system exit.
1615 	 */
1616 	public static void shutdown(final int exitCode) {
1617         getShutdownThread(true, exitCode, "Heritrix shutdown").start();
1618 	}
1619     
1620     protected static Thread getShutdownThread(final boolean sysexit,
1621             final int exitCode, final String name) {
1622         Thread t = new Thread(name) {
1623             public void run() {
1624                 Heritrix.prepareHeritrixShutDown();
1625                 if (sysexit) {
1626                     Heritrix.performHeritrixShutDown(exitCode);
1627                 }
1628             }
1629         };
1630         t.setDaemon(true);
1631         return t;
1632     }
1633     
1634     public static void shutdown() {
1635         shutdown(0);
1636     }
1637     
1638     /***
1639      * Register Heritrix with JNDI, JMX, and with the static hashtable of all
1640      * Heritrix instances known to this JVM.
1641      * 
1642      * If launched from cmdline, register Heritrix MBean if an agent to register
1643      * ourselves with. Usually this method will only have effect if we're
1644      * running in a 1.5.0 JDK and command line options such as
1645      * '-Dcom.sun.management.jmxremote.port=8082
1646      * -Dcom.sun.management.jmxremote.authenticate=false
1647      * -Dcom.sun.management.jmxremote.ssl=false' are supplied.
1648      * See <a href="http://java.sun.com/j2se/1.5.0/docs/guide/management/agent.html">Monitoring
1649      * and Management Using JMX</a>
1650      * for more on the command line options and how to connect to the
1651      * Heritrix bean using the JDK 1.5.0 jconsole tool.  We register currently
1652      * with first server we find (TODO: Make configurable).
1653      * 
1654      * <p>If we register successfully with a JMX agent, then part of the
1655      * registration will include our registering ourselves with JNDI.
1656      * 
1657      * <p>Finally, add the heritrix instance to the hashtable of all the
1658      * Heritrix instances floating in the current VM.  This latter registeration
1659      * happens whether or no there is a JMX agent to register with.  This is
1660      * a list we keep out of convenience so its easy iterating over all
1661      *  all instances calling stop when main application is going down.
1662      * 
1663      * @param h Instance of heritrix to register.
1664      * @param name Name to use for this Heritrix instance.
1665      * @param jmxregister True if we are to register this instance with JMX.
1666      * @throws NullPointerException
1667      * @throws MalformedObjectNameException
1668      * @throws NotCompliantMBeanException 
1669      * @throws MBeanRegistrationException 
1670      * @throws InstanceAlreadyExistsException 
1671      */
1672     protected static void registerHeritrix(final Heritrix h,
1673             final String name, final boolean jmxregister)
1674     throws MalformedObjectNameException, InstanceAlreadyExistsException,
1675     MBeanRegistrationException, NotCompliantMBeanException {
1676         MBeanServer server = getMBeanServer();
1677         if (server != null) {
1678             // Are we to manage the jmx registration?  Or is it being done for
1679             // us by an external process: e.g. This instance was created by
1680             // MBeanAgent.
1681             if (jmxregister) {
1682                 ObjectName objName = (name == null || name.length() <= 0)?
1683                     getJmxObjectName(): getJmxObjectName(name);
1684                 registerMBean(server, h, objName);
1685             }
1686         } else {
1687             // JMX ain't available. Put this instance into the list of Heritrix
1688             // instances so findable by the UI (Normally this is done in the
1689             // JMX postRegister routine below).  When no JMX, can only have
1690             // one instance of Heritrix so no need to do the deregisteration.
1691             Heritrix.instances.put(h.getNoJmxName(), h);
1692         }
1693     }
1694     
1695     protected static void unregisterHeritrix(final Heritrix h)
1696     throws InstanceNotFoundException, MBeanRegistrationException,
1697             NullPointerException {
1698         MBeanServer server = getMBeanServer();
1699         if (server != null) {
1700             server.unregisterMBean(h.mbeanName);
1701         } else {
1702             // JMX ain't available. Remove from list of Heritrix instances.
1703             // Usually this is done by the JMX postDeregister below.
1704             Heritrix.instances.remove(h.getNoJmxName());
1705         }
1706     }
1707     
1708     /***
1709      * Get MBeanServer.
1710      * Currently uses first MBeanServer found.  This will definetly not be whats
1711      * always wanted. TODO: Make which server settable. Also, if none, put up
1712      * our own MBeanServer.
1713      * @return An MBeanServer to register with or null.
1714      */
1715     public static MBeanServer getMBeanServer() {
1716         MBeanServer result = null;
1717         List servers = MBeanServerFactory.findMBeanServer(null);
1718         if (servers == null) {
1719             return result;
1720         }
1721         for (Iterator i = servers.iterator(); i.hasNext();) {
1722             MBeanServer server = (MBeanServer)i.next();
1723             if (server == null) {
1724                 continue;
1725             }
1726             result = server;
1727             break;
1728         }
1729         return result;
1730     }
1731     
1732     public static MBeanServer registerMBean(final Object objToRegister,
1733             final String name, final String type)
1734     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1735     NotCompliantMBeanException {
1736         MBeanServer server = getMBeanServer();
1737         if (server != null) {
1738             server = registerMBean(server, objToRegister, name, type);
1739         }
1740         return server;
1741     }
1742     
1743     public static MBeanServer registerMBean(final MBeanServer server,
1744             final Object objToRegister, final String name, final String type)
1745     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1746     NotCompliantMBeanException {
1747         try {
1748             Hashtable<String,String> ht = new Hashtable<String,String>();
1749             ht.put(JmxUtils.NAME, name);
1750             ht.put(JmxUtils.TYPE, type);
1751             registerMBean(server, objToRegister,
1752                 new ObjectName(CRAWLER_PACKAGE, ht));
1753         } catch (MalformedObjectNameException e) {
1754             e.printStackTrace();
1755         }
1756         return server;
1757     }
1758         
1759     public static MBeanServer registerMBean(final MBeanServer server,
1760                 final Object objToRegister, final ObjectName objName)
1761     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1762     NotCompliantMBeanException {
1763         server.registerMBean(objToRegister, objName);
1764         return server;
1765     }
1766     
1767     public static void unregisterMBean(final MBeanServer server,
1768             final String name, final String type) {
1769         if (server == null) {
1770             return;
1771         }
1772         try {
1773             unregisterMBean(server, getJmxObjectName(name, type));
1774         } catch (MalformedObjectNameException e) {
1775             e.printStackTrace();
1776         }
1777     }
1778             
1779     public static void unregisterMBean(final MBeanServer server,
1780             final ObjectName name) {
1781         try {
1782             server.unregisterMBean(name);
1783             logger.info("Unregistered bean " + name.getCanonicalName());
1784         } catch (InstanceNotFoundException e) {
1785             e.printStackTrace();
1786         } catch (MBeanRegistrationException e) {
1787             e.printStackTrace();
1788         } catch (NullPointerException e) {
1789             e.printStackTrace();
1790         }
1791     }
1792     
1793     /***
1794      * @return Name to use when no JMX agent available.
1795      */
1796     protected String getNoJmxName() {
1797         return this.getClass().getName();
1798     }
1799     
1800     public static ObjectName getJmxObjectName()
1801     throws MalformedObjectNameException, NullPointerException {
1802         return getJmxObjectName("Heritrix", JmxUtils.SERVICE);
1803     }
1804     
1805     public static ObjectName getJmxObjectName(final String name)
1806     throws MalformedObjectNameException, NullPointerException {
1807         return getJmxObjectName(name, JmxUtils.SERVICE);
1808     }
1809     
1810     public static ObjectName getJmxObjectName(final String name,
1811             final String type)
1812     throws MalformedObjectNameException, NullPointerException {
1813         Hashtable<String,String> ht = new Hashtable<String,String>();
1814         ht.put(JmxUtils.NAME, name);
1815         ht.put(JmxUtils.TYPE, type);
1816         return new ObjectName(CRAWLER_PACKAGE, ht);
1817     }
1818     
1819     /***
1820      * @return Returns true if Heritrix was launched from the command line.
1821      * (When launched from command line, we do stuff like put up a web server
1822      * to manage our web interface and we register ourselves with the first
1823      * available jmx agent).
1824      */
1825     public static boolean isCommandLine() {
1826         return Heritrix.commandLine;
1827     }
1828     
1829     /***
1830      * @return True if heritrix has been started.
1831      */
1832     public boolean isStarted() {
1833         return this.jobHandler != null;
1834     }
1835     
1836     public String getStatus() {
1837         StringBuffer buffer = new StringBuffer();
1838         if (this.getJobHandler() != null) {
1839             buffer.append("isRunning=");
1840             buffer.append(this.getJobHandler().isRunning());
1841             buffer.append(" isCrawling=");
1842             buffer.append(this.getJobHandler().isCrawling());
1843             buffer.append(" alertCount=");
1844             buffer.append(getAlertsCount());
1845             buffer.append(" newAlertCount=");
1846             buffer.append(getNewAlertsCount());
1847             if (this.getJobHandler().isCrawling()) {
1848                 buffer.append(" currentJob=");
1849                 buffer.append(this.getJobHandler().getCurrentJob().
1850                     getJmxJobName());
1851             }
1852         }
1853         return buffer.toString();
1854     }
1855     
1856     // Alert methods.
1857     public int getAlertsCount() {
1858         return this.alertManager.getCount();
1859     }
1860     
1861     public int getNewAlertsCount() {
1862         return this.alertManager.getNewCount();
1863     }
1864     
1865     public Vector getAlerts() {
1866         return this.alertManager.getAll();
1867     }
1868     
1869     public Vector getNewAlerts() {
1870         return this.alertManager.getNewAll();
1871     }
1872     
1873     public SinkHandlerLogRecord getAlert(final String id) {
1874         return this.alertManager.get(id);
1875     }
1876     
1877     public void readAlert(final String id) {
1878         this.alertManager.read(id);
1879     }
1880     
1881     public void removeAlert(final String id) {
1882         this.alertManager.remove(id);
1883     }
1884     
1885     /***
1886      * Start Heritrix.
1887      * 
1888      * Used by JMX and webapp initialization for starting Heritrix.
1889      * Not by the cmdline launched Heritrix. Idempotent.
1890      * If start is called by JMX, then new instance of Heritrix is automatically
1891      * registered w/ JMX Agent.  If started by webapp, need to register the new
1892      * Heritrix instance.
1893      */
1894     public void start() {
1895         // Don't start if we've been launched from the command line.
1896         // Don't start if already started.
1897         if (!Heritrix.isCommandLine() && !isStarted()) {
1898             try {
1899                 logger.info(launch());
1900             } catch (Exception e) {
1901                 e.printStackTrace();
1902             }
1903         }
1904     }
1905     
1906     /***
1907      * Stop Heritrix.
1908      * 
1909      * Used by JMX and webapp initialization for stopping Heritrix.
1910      */
1911     public void stop() {
1912         if (this.jobHandler != null) {
1913             this.jobHandler.stop();
1914         }
1915     }
1916 
1917     public String interrupt(String threadName) {
1918         String result = "Thread " + threadName + " not found";
1919         ThreadGroup group = Thread.currentThread().getThreadGroup();
1920         if (group == null) {
1921             return result;
1922         }
1923         // Back up to the root threadgroup before starting
1924         // to iterate over threads.
1925         ThreadGroup parent = null;
1926         while((parent = group.getParent()) != null) {
1927             group = parent;
1928         }
1929         // Do an array that is twice the size of active
1930         // thread count.  That should be big enough.
1931         final int max = group.activeCount() * 2;
1932         Thread [] threads = new Thread[max];
1933         int threadCount = group.enumerate(threads, true);
1934         if (threadCount >= max) {
1935             logger.info("Some threads not found...array too small: " +
1936                 max);
1937         }
1938         for (int j = 0; j < threadCount; j++) {
1939             if (threads[j].getName().equals(threadName)) {
1940                 threads[j].interrupt();
1941                 result = "Interrupt sent to " + threadName;
1942                 break;
1943             }
1944         }
1945         return result;
1946     }
1947 
1948     // OpenMBean implementation.
1949     
1950     /***
1951      * Build up the MBean info for Heritrix main.
1952      * @return Return created mbean info instance.
1953      */
1954     protected OpenMBeanInfoSupport buildMBeanInfo() {
1955         OpenMBeanAttributeInfoSupport[] attributes =
1956             new OpenMBeanAttributeInfoSupport[Heritrix.ATTRIBUTE_LIST.size()];
1957         OpenMBeanConstructorInfoSupport[] constructors =
1958             new OpenMBeanConstructorInfoSupport[1];
1959         OpenMBeanOperationInfoSupport[] operations =
1960             new OpenMBeanOperationInfoSupport[Heritrix.OPERATION_LIST.size()];
1961         MBeanNotificationInfo[] notifications =
1962             new MBeanNotificationInfo[0];
1963 
1964         // Attributes.
1965         attributes[0] =
1966             new OpenMBeanAttributeInfoSupport(Heritrix.STATUS_ATTR,
1967                 "Short basic status message", SimpleType.STRING, true,
1968                 false, false);
1969         // Attributes.
1970         attributes[1] =
1971             new OpenMBeanAttributeInfoSupport(Heritrix.VERSION_ATTR,
1972                 "Heritrix version", SimpleType.STRING, true, false, false);
1973         // Attributes.
1974         attributes[2] =
1975             new OpenMBeanAttributeInfoSupport(Heritrix.ISRUNNING_ATTR,
1976                 "Whether the crawler is running", SimpleType.BOOLEAN, true,
1977                 false, false);
1978         // Attributes.
1979         attributes[3] =
1980             new OpenMBeanAttributeInfoSupport(Heritrix.ISCRAWLING_ATTR,
1981                 "Whether the crawler is crawling", SimpleType.BOOLEAN, true,
1982                 false, false);
1983         // Attributes.
1984         attributes[4] =
1985             new OpenMBeanAttributeInfoSupport(Heritrix.ALERTCOUNT_ATTR,
1986                 "The number of alerts", SimpleType.INTEGER, true, false, false);
1987         // Attributes.
1988         attributes[5] =
1989             new OpenMBeanAttributeInfoSupport(Heritrix.NEWALERTCOUNT_ATTR,
1990                 "The number of new alerts", SimpleType.INTEGER, true, false,
1991                 false);
1992         // Attributes.
1993         attributes[6] =
1994             new OpenMBeanAttributeInfoSupport(Heritrix.CURRENTJOB_ATTR,
1995                 "The name of the job currently being crawled", 
1996                 SimpleType.STRING, true, false, false);
1997 
1998         // Constructors.
1999         constructors[0] = new OpenMBeanConstructorInfoSupport(
2000             "HeritrixOpenMBean", "Constructs Heritrix OpenMBean instance ",
2001             new OpenMBeanParameterInfoSupport[0]);
2002 
2003         // Operations.
2004         operations[0] = new OpenMBeanOperationInfoSupport(
2005             Heritrix.START_OPER, "Start Heritrix instance", null,
2006                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2007         
2008         operations[1] = new OpenMBeanOperationInfoSupport(
2009             Heritrix.STOP_OPER, "Stop Heritrix instance", null,
2010                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2011         
2012         OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[1];
2013         args[0] = new OpenMBeanParameterInfoSupport("threadName",
2014             "Name of thread to send interrupt", SimpleType.STRING);
2015         operations[2] = new OpenMBeanOperationInfoSupport(
2016             Heritrix.INTERRUPT_OPER, "Send thread an interrupt " +
2017                 "(Used debugging)", args, SimpleType.STRING,
2018                 MBeanOperationInfo.ACTION_INFO);
2019         
2020         operations[3] = new OpenMBeanOperationInfoSupport(
2021             Heritrix.START_CRAWLING_OPER, "Set Heritrix instance " +
2022                 "into crawling mode", null, SimpleType.VOID,
2023                 MBeanOperationInfo.ACTION);
2024         
2025         operations[4] = new OpenMBeanOperationInfoSupport(
2026             Heritrix.STOP_CRAWLING_OPER, "Unset Heritrix instance " +
2027                 " crawling mode", null, SimpleType.VOID,
2028                 MBeanOperationInfo.ACTION);
2029         
2030         args = new OpenMBeanParameterInfoSupport[4];
2031         args[0] = new OpenMBeanParameterInfoSupport("pathOrURL",
2032             "Path/URL to order or jar of order+seed",
2033             SimpleType.STRING);
2034         args[1] = new OpenMBeanParameterInfoSupport("name",
2035             "Basename for new job", SimpleType.STRING);
2036         args[2] = new OpenMBeanParameterInfoSupport("description",
2037             "Description to save with new job", SimpleType.STRING);
2038         args[3] = new OpenMBeanParameterInfoSupport("seeds",
2039             "Initial seed(s)", SimpleType.STRING);
2040         operations[5] = new OpenMBeanOperationInfoSupport(
2041             Heritrix.ADD_CRAWL_JOB_OPER, "Add new crawl job", args,
2042                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2043         
2044         args = new OpenMBeanParameterInfoSupport[4];
2045         args[0] = new OpenMBeanParameterInfoSupport("uidOrName",
2046             "Job UID or profile name", SimpleType.STRING);
2047         args[1] = new OpenMBeanParameterInfoSupport("name",
2048             "Basename for new job", SimpleType.STRING);
2049         args[2] = new OpenMBeanParameterInfoSupport("description",
2050             "Description to save with new job", SimpleType.STRING);
2051         args[3] = new OpenMBeanParameterInfoSupport("seeds",
2052             "Initial seed(s)", SimpleType.STRING);
2053         operations[6] = new OpenMBeanOperationInfoSupport(
2054             Heritrix.ADD_CRAWL_JOB_BASEDON_OPER,
2055             "Add a new crawl job based on passed Job UID or profile",
2056             args, SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2057         
2058         args = new OpenMBeanParameterInfoSupport[1];
2059         args[0] = new OpenMBeanParameterInfoSupport("UID",
2060             "Job UID", SimpleType.STRING);
2061         operations[7] = new OpenMBeanOperationInfoSupport(DELETE_CRAWL_JOB_OPER,
2062             "Delete/stop this crawl job", args, SimpleType.VOID,
2063             MBeanOperationInfo.ACTION);
2064         
2065         args = new OpenMBeanParameterInfoSupport[1];
2066         args[0] = new OpenMBeanParameterInfoSupport("index",
2067             "Zero-based index into array of alerts", SimpleType.INTEGER);
2068         operations[8] = new OpenMBeanOperationInfoSupport(
2069             Heritrix.ALERT_OPER, "Return alert at passed index", args,
2070                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2071         
2072         try {
2073             this.jobCompositeType = new CompositeType("job",
2074                     "Job attributes", JOB_KEYS,
2075                     new String [] {"Job unique ID", "Job name", "Job status"},
2076                     new OpenType [] {SimpleType.STRING, SimpleType.STRING,
2077                         SimpleType.STRING});
2078             this.jobsTabularType = new TabularType("jobs", "List of jobs",
2079                     this.jobCompositeType, new String [] {"uid"});
2080         } catch (OpenDataException e) {
2081             // This should never happen.
2082             throw new RuntimeException(e);
2083         }
2084         operations[9] = new OpenMBeanOperationInfoSupport(
2085             Heritrix.PENDING_JOBS_OPER,
2086                 "List of pending jobs (or null if none)", null,
2087                 this.jobsTabularType, MBeanOperationInfo.INFO);
2088         operations[10] = new OpenMBeanOperationInfoSupport(
2089                 Heritrix.COMPLETED_JOBS_OPER,
2090                     "List of completed jobs (or null if none)", null,
2091                     this.jobsTabularType, MBeanOperationInfo.INFO);
2092         
2093         args = new OpenMBeanParameterInfoSupport[2];
2094         args[0] = new OpenMBeanParameterInfoSupport("uid",
2095             "Job unique ID", SimpleType.STRING);
2096         args[1] = new OpenMBeanParameterInfoSupport("name",
2097                 "Report name (e.g. crawl-report, etc.)",
2098                 SimpleType.STRING);
2099         operations[11] = new OpenMBeanOperationInfoSupport(
2100             Heritrix.CRAWLEND_REPORT_OPER, "Return crawl-end report", args,
2101                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2102         
2103         operations[12] = new OpenMBeanOperationInfoSupport(
2104             Heritrix.SHUTDOWN_OPER, "Shutdown container", null,
2105                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2106         
2107         args = new OpenMBeanParameterInfoSupport[2];
2108         args[0] = new OpenMBeanParameterInfoSupport("level",
2109             "Log level: e.g. SEVERE, WARNING, etc.", SimpleType.STRING);
2110         args[1] = new OpenMBeanParameterInfoSupport("message",
2111             "Log message", SimpleType.STRING);
2112         operations[13] = new OpenMBeanOperationInfoSupport(Heritrix.LOG_OPER,
2113             "Add a log message", args, SimpleType.VOID,
2114             MBeanOperationInfo.ACTION);
2115         
2116         operations[14] = new OpenMBeanOperationInfoSupport(
2117             Heritrix.DESTROY_OPER, "Destroy Heritrix instance", null,
2118                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2119         
2120         operations[15] = new OpenMBeanOperationInfoSupport(
2121             Heritrix.TERMINATE_CRAWL_JOB_OPER,
2122             "Returns false if no current job", null, SimpleType.BOOLEAN,
2123             MBeanOperationInfo.ACTION);
2124         
2125         operations[16] = new OpenMBeanOperationInfoSupport(
2126             Heritrix.REBIND_JNDI_OPER,
2127             "Rebinds this Heritrix with JNDI.", null,
2128             SimpleType.VOID, MBeanOperationInfo.ACTION);
2129 
2130         // Build the info object.
2131         return new OpenMBeanInfoSupport(this.getClass().getName(),
2132             "Heritrix Main OpenMBean", attributes, constructors, operations,
2133             notifications);
2134     }
2135     
2136     public Object getAttribute(String attribute_name)
2137     throws AttributeNotFoundException {
2138         if (attribute_name == null) {
2139             throw new RuntimeOperationsException(
2140                  new IllegalArgumentException("Attribute name cannot be null"),
2141                  "Cannot call getAttribute with null attribute name");
2142         }
2143         if (!Heritrix.ATTRIBUTE_LIST.contains(attribute_name)) {
2144             throw new AttributeNotFoundException("Attribute " +
2145                  attribute_name + " is unimplemented.");
2146         }
2147         // The pattern in the below is to match an attribute and when found
2148         // do a return out of if clause.  Doing it this way, I can fall
2149         // on to the AttributeNotFoundException for case where we've an
2150         // attribute but no handler.
2151         if (attribute_name.equals(STATUS_ATTR)) {
2152             return getStatus();
2153         }
2154         if (attribute_name.equals(VERSION_ATTR)) {
2155             return getVersion();
2156         }
2157 
2158         if (attribute_name.equals(ISRUNNING_ATTR)) {
2159             return new Boolean(this.getJobHandler().isRunning());
2160         }
2161         if (attribute_name.equals(ISCRAWLING_ATTR)) {
2162             return new Boolean(this.getJobHandler().isCrawling());
2163         }
2164         if (attribute_name.equals(ALERTCOUNT_ATTR)) {
2165             return new Integer(getAlertsCount());
2166         }
2167         if (attribute_name.equals(NEWALERTCOUNT_ATTR)) {
2168             return new Integer(getNewAlertsCount());
2169         }
2170         if (attribute_name.equals(CURRENTJOB_ATTR)) {
2171             if (this.getJobHandler().isCrawling()) {
2172                 return this.getJobHandler().getCurrentJob().getJmxJobName();
2173             }
2174             return null;
2175         }
2176         throw new AttributeNotFoundException("Attribute " +
2177             attribute_name + " not found.");
2178     }
2179 
2180     public void setAttribute(Attribute attribute)
2181     throws AttributeNotFoundException {
2182         throw new AttributeNotFoundException("No attribute can be set in " +
2183             "this MBean");
2184     }
2185 
2186     public AttributeList getAttributes(String [] attributeNames) {
2187         if (attributeNames == null) {
2188             throw new RuntimeOperationsException(
2189                 new IllegalArgumentException("attributeNames[] cannot be " +
2190                 "null"), "Cannot call getAttributes with null attribute " +
2191                 "names");
2192         }
2193         AttributeList resultList = new AttributeList();
2194         if (attributeNames.length == 0) {
2195             return resultList;
2196         }
2197         for (int i = 0; i < attributeNames.length; i++) {
2198             try {
2199                 Object value = getAttribute(attributeNames[i]);
2200                 resultList.add(new Attribute(attributeNames[i], value));
2201             } catch (Exception e) {
2202                 e.printStackTrace();
2203             }
2204         }
2205         return(resultList);
2206     }
2207 
2208     public AttributeList setAttributes(AttributeList attributes) {
2209         return new AttributeList(); // always empty
2210     }
2211 
2212     public Object invoke(final String operationName, final Object[] params,
2213         final String[] signature)
2214     throws ReflectionException {
2215         if (operationName == null) {
2216             throw new RuntimeOperationsException(
2217                 new IllegalArgumentException("Operation name cannot be null"),
2218                 "Cannot call invoke with null operation name");
2219         }
2220         // INFO logging of JMX invokes: [#HER-907]
2221         if (logger.isLoggable(Level.INFO)) {
2222             String paramsString = "";
2223             for (Object o : params) {
2224                 paramsString.concat("[" + o.toString() + "]");
2225             }
2226             logger.info("JMX invoke: " + operationName + " [" + paramsString
2227                     + "]");
2228         } 
2229         // The pattern in the below is to match an operation and when found
2230         // do a return out of if clause.  Doing it this way, I can fall
2231         // on to the MethodNotFoundException for case where we've an
2232         // attribute but no handler.
2233         if (operationName.equals(START_OPER)) {
2234             JmxUtils.checkParamsCount(START_OPER, params, 0);
2235             start();
2236             return null;
2237         }
2238         if (operationName.equals(STOP_OPER)) {
2239             JmxUtils.checkParamsCount(STOP_OPER, params, 0);
2240             stop();
2241             return null;
2242         }
2243         if (operationName.equals(DESTROY_OPER)) {
2244             JmxUtils.checkParamsCount(DESTROY_OPER, params, 0);
2245             destroy();
2246             return null;
2247         }
2248         if (operationName.equals(TERMINATE_CRAWL_JOB_OPER)) {
2249             JmxUtils.checkParamsCount(TERMINATE_CRAWL_JOB_OPER, params, 0);
2250             return new Boolean(this.jobHandler.terminateCurrentJob());
2251         }
2252         if (operationName.equals(REBIND_JNDI_OPER)) {
2253             JmxUtils.checkParamsCount(REBIND_JNDI_OPER, params, 0);
2254             try {
2255 				registerContainerJndi();
2256 			} catch (MalformedObjectNameException e) {
2257 				throw new RuntimeOperationsException(new RuntimeException(e));
2258 			} catch (UnknownHostException e) {
2259 				throw new RuntimeOperationsException(new RuntimeException(e));
2260 			} catch (NamingException e) {
2261 				throw new RuntimeOperationsException(new RuntimeException(e));
2262 			}
2263             return null;
2264         }
2265         if (operationName.equals(SHUTDOWN_OPER)) {
2266             JmxUtils.checkParamsCount(SHUTDOWN_OPER, params, 0);
2267             Heritrix.shutdown();
2268             return null;
2269         }
2270         if (operationName.equals(LOG_OPER)) {
2271             JmxUtils.checkParamsCount(LOG_OPER, params, 2);
2272             logger.log(Level.parse((String)params[0]), (String)params[1]);
2273             return null;
2274         }
2275         if (operationName.equals(INTERRUPT_OPER)) {
2276             JmxUtils.checkParamsCount(INTERRUPT_OPER, params, 1);
2277             return interrupt((String)params[0]);
2278         }       
2279         if (operationName.equals(START_CRAWLING_OPER)) {
2280             JmxUtils.checkParamsCount(START_CRAWLING_OPER, params, 0);
2281             startCrawling();
2282             return null;
2283         }
2284         if (operationName.equals(STOP_CRAWLING_OPER)) {
2285             JmxUtils.checkParamsCount(STOP_CRAWLING_OPER, params, 0);
2286             stopCrawling();
2287             return null;
2288         }
2289         if (operationName.equals(ADD_CRAWL_JOB_OPER)) {
2290             JmxUtils.checkParamsCount(ADD_CRAWL_JOB_OPER, params, 4);
2291             try {
2292                 return addCrawlJob((String)params[0], (String)params[1],
2293                     checkForEmptyPlaceHolder((String)params[2]),
2294                     checkForEmptyPlaceHolder((String)params[3]));
2295             } catch (IOException e) {
2296                 throw new RuntimeOperationsException(new RuntimeException(e));
2297             } catch (FatalConfigurationException e) {
2298                 throw new RuntimeOperationsException(new RuntimeException(e));
2299             }
2300         }
2301         if (operationName.equals(DELETE_CRAWL_JOB_OPER)) {
2302             JmxUtils.checkParamsCount(DELETE_CRAWL_JOB_OPER, params, 1);
2303             this.jobHandler.deleteJob((String)params[0]);
2304             return null;
2305         }
2306         
2307         if (operationName.equals(ADD_CRAWL_JOB_BASEDON_OPER)) {
2308             JmxUtils.checkParamsCount(ADD_CRAWL_JOB_BASEDON_OPER, params, 4);
2309             return addCrawlJobBasedOn((String)params[0], (String)params[1],
2310                     checkForEmptyPlaceHolder((String)params[2]),
2311                     checkForEmptyPlaceHolder((String)params[3]));
2312         }       
2313         if (operationName.equals(ALERT_OPER)) {
2314             JmxUtils.checkParamsCount(ALERT_OPER, params, 1);
2315             SinkHandlerLogRecord slr = null;
2316             if (this.alertManager.getCount() > 0) {
2317                 // This is creating a vector of all alerts just so I can then
2318                 // use passed index into resultant vector -- needs to be
2319                 // improved.
2320                 slr = (SinkHandlerLogRecord)this.alertManager.getAll().
2321                     get(((Integer)params[0]).intValue());
2322             }
2323             return (slr != null)? slr.toString(): null;
2324         }
2325         
2326         if (operationName.equals(PENDING_JOBS_OPER)) {
2327                 JmxUtils.checkParamsCount(PENDING_JOBS_OPER, params, 0);
2328             try {
2329                 return makeJobsTabularData(getJobHandler().getPendingJobs());
2330             } catch (OpenDataException e) {
2331                 throw new RuntimeOperationsException(new RuntimeException(e));
2332             }
2333         }
2334         
2335         if (operationName.equals(COMPLETED_JOBS_OPER)) {
2336                 JmxUtils.checkParamsCount(COMPLETED_JOBS_OPER, params, 0);
2337             try {
2338                 return makeJobsTabularData(getJobHandler().getCompletedJobs());
2339             } catch (OpenDataException e) {
2340                 throw new RuntimeOperationsException(new RuntimeException(e));
2341             }
2342         }
2343         
2344         if (operationName.equals(CRAWLEND_REPORT_OPER)) {
2345             JmxUtils.checkParamsCount(CRAWLEND_REPORT_OPER, params, 2);
2346             try {
2347                 return getCrawlendReport((String)params[0], (String) params[1]);
2348             } catch (IOException e) {
2349                 throw new RuntimeOperationsException(new RuntimeException(e));
2350             }
2351         }
2352         
2353         throw new ReflectionException(
2354             new NoSuchMethodException(operationName),
2355                 "Cannot find the operation " + operationName);
2356     }
2357     
2358     /***
2359      * Return named crawl end report for job with passed uid.
2360      * Crawler makes reports when its finished its crawl.  Use this method
2361      * to get a String version of one of these files.
2362      * @param jobUid The unique ID for the job whose reports you want to see
2363      * (Must be a completed job).
2364      * @param reportName Name of report minus '.txt' (e.g. crawl-report).
2365      * @return String version of the on-disk report.
2366      * @throws IOException 
2367      */
2368     protected String getCrawlendReport(String jobUid, String reportName)
2369     throws IOException {
2370         CrawlJob job = getJobHandler().getJob(jobUid);
2371         if (job == null) {
2372             throw new IOException("No such job: " + jobUid);
2373         }
2374         File report = new File(job.getDirectory(), reportName + ".txt");
2375         if (!report.exists()) {
2376             throw new FileNotFoundException(report.getAbsolutePath());
2377         }
2378         return FileUtils.readFileAsString(report);
2379     }
2380     
2381     protected TabularData makeJobsTabularData(List jobs)
2382     throws OpenDataException {
2383         if (jobs == null || jobs.size() == 0) {
2384             return null;
2385         }
2386         TabularData td = new TabularDataSupport(this.jobsTabularType);
2387         for (Iterator i = jobs.iterator(); i.hasNext();) {
2388             CrawlJob job = (CrawlJob)i.next();
2389             CompositeData cd = new CompositeDataSupport(this.jobCompositeType,
2390                 JOB_KEYS,
2391                 new String [] {job.getUID(), job.getJobName(), job.getStatus()});
2392             td.put(cd);
2393         }
2394         return td;
2395     }
2396     
2397     /***
2398      * If passed str has placeholder for the empty string, return the empty
2399      * string else return orginal.
2400      * Dumb jmx clients can't pass empty string so they'll pass a representation
2401      * of empty string such as ' ' or '-'.  Convert such strings to empty
2402      * string.
2403      * @param str String to check.
2404      * @return Original <code>str</code> or empty string if <code>str</code>
2405      * contains a placeholder for the empty-string (e.g. '-', or ' ').
2406      */
2407     protected String checkForEmptyPlaceHolder(String str) {
2408         return TextUtils.matches("-| +", str)? "": str;
2409     }
2410 
2411     public MBeanInfo getMBeanInfo() {
2412         return this.openMBeanInfo;
2413     }
2414     
2415     /***
2416      * @return Name this instance registered in JMX (Only available after JMX
2417      * registration).
2418      */
2419     public ObjectName getMBeanName() {
2420         return this.mbeanName;
2421     }
2422 
2423     public ObjectName preRegister(MBeanServer server, ObjectName name)
2424     throws Exception {
2425         this.mbeanServer = server;
2426         @SuppressWarnings("unchecked")
2427         Hashtable<String,String> ht = name.getKeyPropertyList();
2428         if (!ht.containsKey(JmxUtils.NAME)) {
2429             throw new IllegalArgumentException("Name property required" +
2430                 name.getCanonicalName());
2431         }
2432         if (!ht.containsKey(JmxUtils.TYPE)) {
2433             ht.put(JmxUtils.TYPE, JmxUtils.SERVICE);
2434             name = new ObjectName(name.getDomain(), ht);
2435         }
2436         this.mbeanName = addGuiPort(addVitals(name));
2437         Heritrix.instances.put(this.mbeanName.
2438             getCanonicalKeyPropertyListString(), this);
2439         return this.mbeanName;
2440     }
2441     
2442     /***
2443      * Add vital stats to passed in ObjectName.
2444      * @param name ObjectName to add to.
2445      * @return name with host, guiport, and jmxport added.
2446      * @throws UnknownHostException
2447      * @throws MalformedObjectNameException
2448      * @throws NullPointerException
2449      */
2450     protected static ObjectName addVitals(ObjectName name)
2451     throws UnknownHostException, MalformedObjectNameException,
2452     NullPointerException {
2453         @SuppressWarnings("unchecked")
2454         Hashtable<String,String> ht = name.getKeyPropertyList();
2455         if (!ht.containsKey(JmxUtils.HOST)) {
2456             ht.put(JmxUtils.HOST, InetAddress.getLocalHost().getHostName());
2457             name = new ObjectName(name.getDomain(), ht);
2458         }
2459         if (!ht.containsKey(JmxUtils.JMX_PORT)) {
2460             // Add jdk jmx-port. This will be present if we've attached
2461             // ourselves to the jdk jmx agent.  Otherwise, we've been
2462             // deployed in a j2ee container with its own jmx agent.  In
2463             // this case we won't know how to get jmx port.
2464             String p = System.getProperty("com.sun.management.jmxremote.port");
2465             if (p != null && p.length() > 0) {
2466                 ht.put(JmxUtils.JMX_PORT, p);
2467                 name = new ObjectName(name.getDomain(), ht);
2468             }
2469         }
2470         return name;
2471     }
2472     
2473     protected static ObjectName addGuiPort(ObjectName name)
2474     throws MalformedObjectNameException, NullPointerException {
2475         @SuppressWarnings("unchecked")
2476         Hashtable<String,String> ht = name.getKeyPropertyList();
2477         if (!ht.containsKey(JmxUtils.GUI_PORT)) {
2478             // Add gui port if this instance was started with a gui.
2479             if (Heritrix.gui) {
2480                 ht.put(JmxUtils.GUI_PORT, Integer.toString(Heritrix.guiPort));
2481                 name = new ObjectName(name.getDomain(), ht);
2482             }
2483         }
2484         return name;
2485     }
2486 
2487     public void postRegister(Boolean registrationDone) {
2488         if (logger.isLoggable(Level.INFO)) {
2489             logger.info(
2490                 JmxUtils.getLogRegistrationMsg(this.mbeanName.getCanonicalName(),
2491                 this.mbeanServer, registrationDone.booleanValue()));
2492         }
2493         try {
2494             registerJndi(this.mbeanName);
2495         } catch (Exception e) {
2496             logger.log(Level.SEVERE, "Failed jndi registration", e);
2497         }
2498     }
2499 
2500     public void preDeregister() throws Exception {
2501         deregisterJndi(this.mbeanName);
2502     }
2503 
2504     public void postDeregister() {
2505         Heritrix.instances.
2506             remove(this.mbeanName.getCanonicalKeyPropertyListString());
2507         if (logger.isLoggable(Level.INFO)) {
2508             logger.info(JmxUtils.getLogUnregistrationMsg(
2509                     this.mbeanName.getCanonicalName(), this.mbeanServer));
2510         }
2511     }
2512     
2513     protected static void registerContainerJndi()
2514     throws MalformedObjectNameException, NullPointerException,
2515     		UnknownHostException, NamingException {
2516     	registerJndi(getJndiContainerName());
2517     }
2518 
2519     protected static void registerJndi(final ObjectName name)
2520     throws NullPointerException, NamingException {
2521     	Context c = getJndiContext();
2522     	if (c == null) {
2523     		return;
2524     	}
2525         CompoundName key = JndiUtils.bindObjectName(c, name);
2526         if (logger.isLoggable(Level.FINE)) {
2527             logger.fine("Bound '"  + key + "' to '" + JndiUtils.
2528                getCompoundName(c.getNameInNamespace()).toString()
2529                + "' jndi context");
2530         }
2531     }
2532     
2533     protected static void deregisterJndi(final ObjectName name)
2534     throws NullPointerException, NamingException {
2535     	Context c = getJndiContext();
2536     	if (c == null) {
2537     		return;
2538     	}
2539         CompoundName key = JndiUtils.unbindObjectName(c, name);
2540         if (logger.isLoggable(Level.FINE)) {
2541             logger.fine("Unbound '" + key + "' from '" +
2542                 JndiUtils.getCompoundName(c.getNameInNamespace()).toString() +
2543                 	"' jndi context");
2544         }
2545     }
2546     
2547     /***
2548      * @return Jndi context for the crawler or null if none found.
2549      * @throws NamingException 
2550      */
2551     protected static Context getJndiContext() throws NamingException {
2552     	Context c = null;
2553     	try {
2554     		c = JndiUtils.getSubContext(CRAWLER_PACKAGE);
2555     	} catch (NoInitialContextException e) {
2556     		logger.fine("No JNDI Context: " + e.toString());
2557     	}
2558     	return c;
2559     }
2560     
2561     /***
2562      * @return Jndi container name -- the name to use for the 'container' that
2563      * can host zero or more heritrix instances (Return a JMX ObjectName.  We
2564      * use ObjectName because then we're sync'd with JMX naming and ObjectName
2565      * has nice parsing).
2566      * @throws NullPointerException 
2567      * @throws MalformedObjectNameException 
2568      * @throws UnknownHostException 
2569      */
2570     protected static ObjectName getJndiContainerName()
2571     throws MalformedObjectNameException, NullPointerException,
2572     UnknownHostException {
2573         ObjectName objName = new ObjectName(CRAWLER_PACKAGE, "type",
2574             "container");
2575         return addVitals(objName);
2576     }
2577     
2578     /***
2579      * @return Return all registered instances of Heritrix (Rare are there 
2580      * more than one).
2581      */
2582     public static Map getInstances() {
2583         return Heritrix.instances;
2584     }
2585     
2586     /***
2587      * @return True if only one instance of Heritrix.
2588      */
2589     public static boolean isSingleInstance() {
2590         return Heritrix.instances != null && Heritrix.instances.size() == 1;
2591     }
2592     
2593     /***
2594      * @return Returns single instance or null if no instance or multiple.
2595      */
2596     public static Heritrix getSingleInstance() {
2597         return !isSingleInstance()?
2598             null:
2599             (Heritrix)Heritrix.instances.
2600                 get(Heritrix.instances.keySet().iterator().next());
2601     }
2602 }