1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * CrawlController.java
20   * Created on May 14, 2003
21   *
22   * $Id: CrawlController.java 6145 2009-02-27 23:05:20Z gojomo $
23   */
24  package org.archive.crawler.framework;
25  
26  import java.io.File;
27  import java.io.FileOutputStream;
28  import java.io.FilenameFilter;
29  import java.io.IOException;
30  import java.io.ObjectInputStream;
31  import java.io.PrintWriter;
32  import java.io.Serializable;
33  import java.util.ArrayList;
34  import java.util.Arrays;
35  import java.util.Collections;
36  import java.util.EventObject;
37  import java.util.HashMap;
38  import java.util.HashSet;
39  import java.util.Hashtable;
40  import java.util.Iterator;
41  import java.util.LinkedList;
42  import java.util.List;
43  import java.util.Map;
44  import java.util.Set;
45  import java.util.TreeSet;
46  import java.util.concurrent.locks.ReentrantLock;
47  import java.util.logging.FileHandler;
48  import java.util.logging.Formatter;
49  import java.util.logging.Level;
50  import java.util.logging.Logger;
51  
52  import javax.management.AttributeNotFoundException;
53  import javax.management.InvalidAttributeValueException;
54  import javax.management.MBeanException;
55  import javax.management.ReflectionException;
56  
57  import org.apache.commons.httpclient.URIException;
58  import org.archive.crawler.admin.CrawlJob;
59  import org.archive.crawler.admin.StatisticsTracker;
60  import org.archive.crawler.datamodel.Checkpoint;
61  import org.archive.crawler.datamodel.CrawlOrder;
62  import org.archive.crawler.datamodel.CrawlURI;
63  import org.archive.crawler.datamodel.ServerCache;
64  import org.archive.crawler.event.CrawlStatusListener;
65  import org.archive.crawler.event.CrawlURIDispositionListener;
66  import org.archive.crawler.framework.exceptions.FatalConfigurationException;
67  import org.archive.crawler.framework.exceptions.InitializationException;
68  import org.archive.crawler.io.LocalErrorFormatter;
69  import org.archive.crawler.io.RuntimeErrorFormatter;
70  import org.archive.crawler.io.StatisticsLogFormatter;
71  import org.archive.crawler.io.UriErrorFormatter;
72  import org.archive.crawler.io.UriProcessingFormatter;
73  import org.archive.crawler.settings.MapType;
74  import org.archive.crawler.settings.SettingsHandler;
75  import org.archive.crawler.util.CheckpointUtils;
76  import org.archive.io.GenerationFileHandler;
77  import org.archive.net.UURI;
78  import org.archive.net.UURIFactory;
79  import org.archive.util.ArchiveUtils;
80  import org.archive.util.CachedBdbMap;
81  import org.archive.util.FileUtils;
82  import org.archive.util.Reporter;
83  import org.archive.util.bdbje.EnhancedEnvironment;
84  import org.xbill.DNS.DClass;
85  import org.xbill.DNS.Lookup;
86  
87  import com.sleepycat.bind.serial.StoredClassCatalog;
88  import com.sleepycat.je.CheckpointConfig;
89  import com.sleepycat.je.Database;
90  import com.sleepycat.je.DatabaseException;
91  import com.sleepycat.je.DbInternal;
92  import com.sleepycat.je.EnvironmentConfig;
93  import com.sleepycat.je.dbi.EnvironmentImpl;
94  import com.sleepycat.je.utilint.DbLsn;
95  
96  /***
97   * CrawlController collects all the classes which cooperate to
98   * perform a crawl and provides a high-level interface to the
99   * running crawl.
100  *
101  * As the "global context" for a crawl, subcomponents will
102  * often reach each other through the CrawlController.
103  *
104  * @author Gordon Mohr
105  */
106 public class CrawlController implements Serializable, Reporter {
107     // be robust against trivial implementation changes
108     private static final long serialVersionUID =
109         ArchiveUtils.classnameBasedUID(CrawlController.class,1);
110 
111     /***
112      * Messages from the crawlcontroller.
113      *
114      * They appear on console.
115      */
116     private final static Logger LOGGER =
117         Logger.getLogger(CrawlController.class.getName());
118 
119     // manifest support
120     /*** abbrieviation label for config files in manifest */
121     public static final char MANIFEST_CONFIG_FILE = 'C';
122     /*** abbrieviation label for report files in manifest */
123     public static final char MANIFEST_REPORT_FILE = 'R';
124     /*** abbrieviation label for log files in manifest */
125     public static final char MANIFEST_LOG_FILE = 'L';
126 
127     // key log names
128     private static final String LOGNAME_PROGRESS_STATISTICS =
129         "progress-statistics";
130     private static final String LOGNAME_URI_ERRORS = "uri-errors";
131     private static final String LOGNAME_RUNTIME_ERRORS = "runtime-errors";
132     private static final String LOGNAME_LOCAL_ERRORS = "local-errors";
133     private static final String LOGNAME_CRAWL = "crawl";
134 
135     // key subcomponents which define and implement a crawl in progress
136     private transient CrawlOrder order;
137     private transient CrawlScope scope;
138     private transient ProcessorChainList processorChains;
139     
140     private transient Frontier frontier;
141 
142     private transient ToePool toePool;
143     
144     private transient ServerCache serverCache;
145     
146     // This gets passed into the initialize method.
147     private transient SettingsHandler settingsHandler;
148 
149 
150     // Used to enable/disable single-threaded operation after OOM
151     private volatile transient boolean singleThreadMode = false; 
152     private transient ReentrantLock singleThreadLock = null;
153 
154     // emergency reserve of memory to allow some progress/reporting after OOM
155     private transient LinkedList<char[]> reserveMemory;
156     private static final int RESERVE_BLOCKS = 1;
157     private static final int RESERVE_BLOCK_SIZE = 6*2^20; // 6MB
158 
159     // crawl state: as requested or actual
160     
161     /***
162      * Crawl exit status.
163      */
164     private transient String sExit;
165 
166     private static final Object NASCENT = "NASCENT".intern();
167     private static final Object RUNNING = "RUNNING".intern();
168     private static final Object PAUSED = "PAUSED".intern();
169     private static final Object PAUSING = "PAUSING".intern();
170     private static final Object CHECKPOINTING = "CHECKPOINTING".intern();
171     private static final Object STOPPING = "STOPPING".intern();
172     private static final Object FINISHED = "FINISHED".intern();
173     private static final Object STARTED = "STARTED".intern();
174     private static final Object PREPARING = "PREPARING".intern();
175 
176     transient private Object state = NASCENT;
177 
178     // disk paths
179     private transient File disk;        // overall disk path
180     private transient File logsDisk;    // for log files
181     
182     /***
183      * For temp files representing state of crawler (eg queues)
184      */
185     private transient File stateDisk;
186     
187     /***
188      * For discardable temp files (eg fetch buffers).
189      */
190     private transient File scratchDisk;
191 
192     /***
193      * Directory that holds checkpoint.
194      */
195     private transient File checkpointsDisk;
196     
197     /***
198      * Checkpointer.
199      * Knows if checkpoint in progress and what name of checkpoint is.  Also runs
200      * checkpoints.
201      */
202     private Checkpointer checkpointer;
203     
204     /***
205      * Gets set to checkpoint we're in recovering if in checkpoint recover
206      * mode.  Gets setup by {@link #getCheckpointRecover()}.
207      */
208     private transient Checkpoint checkpointRecover = null;
209 
210     // crawl limits
211     private long maxBytes;
212     private long maxDocument;
213     private long maxTime;
214 
215     /***
216      * A manifest of all files used/created during this crawl. Written to file
217      * at the end of the crawl (the absolutely last thing done).
218      */
219     private StringBuffer manifest;
220 
221     /***
222      * Record of fileHandlers established for loggers,
223      * assisting file rotation.
224      */
225     transient private Map<Logger,FileHandler> fileHandlers;
226 
227     /*** suffix to use on active logs */
228     public static final String CURRENT_LOG_SUFFIX = ".log";
229 
230     /***
231      * Crawl progress logger.
232      *
233      * No exceptions.  Logs summary result of each url processing.
234      */
235     public transient Logger uriProcessing;
236 
237     /***
238      * This logger contains unexpected runtime errors.
239      *
240      * Would contain errors trying to set up a job or failures inside
241      * processors that they are not prepared to recover from.
242      */
243     public transient Logger runtimeErrors;
244 
245     /***
246      * This logger is for job-scoped logging, specifically errors which
247      * happen and are handled within a particular processor.
248      *
249      * Examples would be socket timeouts, exceptions thrown by extractors, etc.
250      */
251     public transient Logger localErrors;
252 
253     /***
254      * Special log for URI format problems, wherever they may occur.
255      */
256     public transient Logger uriErrors;
257 
258     /***
259      * Statistics tracker writes here at regular intervals.
260      */
261     private transient Logger progressStats;
262 
263     /***
264      * Logger to hold job summary report.
265      *
266      * Large state reports made at infrequent intervals (e.g. job ending) go
267      * here.
268      */
269     public transient Logger reports;
270 
271     protected StatisticsTracking statistics = null;
272 
273     /***
274      * List of crawl status listeners.
275      *
276      * All iterations need to synchronize on this object if they're to avoid
277      * concurrent modification exceptions.
278      * See {@link java.util.Collections#synchronizedList(List)}.
279      */
280     private transient List<CrawlStatusListener> registeredCrawlStatusListeners =
281         Collections.synchronizedList(new ArrayList<CrawlStatusListener>());
282     
283     // Since there is a high probability that there will only ever by one
284     // CrawlURIDispositionListner we will use this while there is only one:
285     private transient CrawlURIDispositionListener
286         registeredCrawlURIDispositionListener;
287 
288     // And then switch to the array once there is more then one.
289      protected transient ArrayList<CrawlURIDispositionListener> 
290      registeredCrawlURIDispositionListeners;
291     
292     /*** Shared bdb Environment for Frontier subcomponents */
293     // TODO: investigate using multiple environments to split disk accesses
294     // across separate physical disks
295     private transient EnhancedEnvironment bdbEnvironment = null;
296     
297     /***
298      * Keep a list of all BigMap instance made -- shouldn't be many -- so that
299      * we can checkpoint.
300      */
301     private transient Map<String,CachedBdbMap<?,?>> bigmaps = null;
302     
303     /***
304      * Default constructor
305      */
306     public CrawlController() {
307         super();
308         // Defer most setup to initialize methods
309     }
310 
311     /***
312      * Starting from nothing, set up CrawlController and associated
313      * classes to be ready for a first crawl.
314      *
315      * @param sH Settings handler.
316      * @throws InitializationException
317      */
318     public void initialize(SettingsHandler sH)
319     throws InitializationException {
320         sendCrawlStateChangeEvent(PREPARING, CrawlJob.STATUS_PREPARING);
321  
322         this.singleThreadLock = new ReentrantLock();
323         this.settingsHandler = sH;
324         installThreadContextSettingsHandler();
325         this.order = settingsHandler.getOrder();
326         this.order.setController(this);
327         this.bigmaps = new Hashtable<String,CachedBdbMap<?,?>>();
328         sExit = "";
329         this.manifest = new StringBuffer();
330         String onFailMessage = "";
331         try {
332             onFailMessage = "You must set the User-Agent and From HTTP" +
333             " header values to acceptable strings. \n" +
334             " User-Agent: [software-name](+[info-url])[misc]\n" +
335             " From: [email-address]\n";
336             order.checkUserAgentAndFrom();
337 
338             onFailMessage = "Unable to setup disk";
339             if (disk == null) {
340                 setupDisk();
341             }
342 
343             onFailMessage = "Unable to create log file(s)";
344             setupLogs();
345             
346             // Figure if we're to do a checkpoint restore. If so, get the
347             // checkpointRecover instance and then put into place the old bdb
348             // log files. If any of the log files already exist in target state
349             // diretory, WE DO NOT OVERWRITE (Makes for faster recovery).
350             // CrawlController checkpoint recovery code manages restoration of
351             // the old StatisticsTracker, any BigMaps used by the Crawler and
352             // the moving of bdb log files into place only. Other objects
353             // interested in recovery need to ask if
354             // CrawlController#isCheckpointRecover is set to figure if in
355             // recovery and then take appropriate recovery action
356             // (These objects can call CrawlController#getCheckpointRecover
357             // to get the directory that might hold files/objects dropped
358             // checkpointing).  Such objects will need to use a technique other
359             // than object serialization restoring settings because they'll
360             // have already been constructed when comes time for object to ask
361             // if its to recover itself. See ARCWriterProcessor for example.
362             onFailMessage = "Unable to test/run checkpoint recover";
363             this.checkpointRecover = getCheckpointRecover();
364             if (this.checkpointRecover == null) {
365                 this.checkpointer =
366                     new Checkpointer(this, this.checkpointsDisk);
367             } else {
368                 setupCheckpointRecover();
369             }
370             
371             onFailMessage = "Unable to setup bdb environment.";
372             setupBdb();
373             
374             onFailMessage = "Unable to setup statistics";
375             setupStatTracking();
376             
377             onFailMessage = "Unable to setup crawl modules";
378             setupCrawlModules();
379         } catch (Exception e) {
380             String tmp = "On crawl: "
381                 + settingsHandler.getSettingsObject(null).getName() + " " +
382                 onFailMessage;
383             LOGGER.log(Level.SEVERE, tmp, e);
384             throw new InitializationException(tmp, e);
385         }
386 
387         // force creation of DNS Cache now -- avoids CacheCleaner in toe-threads group
388         // also cap size at 1 (we never wanta cached value; 0 is non-operative)
389         Lookup.getDefaultCache(DClass.IN).setMaxEntries(1);
390         //dns.getRecords("localhost", Type.A, DClass.IN);
391         
392         setupToePool();
393         setThresholds();
394         
395         reserveMemory = new LinkedList<char[]>();
396         for(int i = 1; i < RESERVE_BLOCKS; i++) {
397             reserveMemory.add(new char[RESERVE_BLOCK_SIZE]);
398         }
399     }
400 
401     /***
402      * Utility method to install this crawl's SettingsHandler into the 
403      * 'global' (for this thread) holder, so that any subsequent 
404      * deserialization operations in this thread can find it. 
405      * 
406      * @param sH
407      */
408     public void installThreadContextSettingsHandler() {
409         SettingsHandler.setThreadContextSettingsHandler(settingsHandler);
410     }
411     
412     /***
413      * Does setup of checkpoint recover.
414      * Copies bdb log files into state dir.
415      * @throws IOException
416      */
417     protected void setupCheckpointRecover()
418     throws IOException {
419         long started = System.currentTimeMillis();;
420         if (LOGGER.isLoggable(Level.FINE)) {
421             LOGGER.fine("Starting recovery setup -- copying into place " +
422                 "bdbje log files -- for checkpoint named " +
423                 this.checkpointRecover.getDisplayName());
424         }
425         // Mark context we're in a recovery.
426         this.checkpointer.recover(this);
427         this.progressStats.info("CHECKPOINT RECOVER " +
428             this.checkpointRecover.getDisplayName());
429         // Copy the bdb log files to the state dir so we don't damage
430         // old checkpoint.  If thousands of log files, can take
431         // tens of minutes (1000 logs takes ~5 minutes to java copy,
432         // dependent upon hardware).  If log file already exists over in the
433         // target state directory, we do not overwrite -- we assume the log
434         // file in the target same as one we'd copy from the checkpoint dir.
435         File bdbSubDir = CheckpointUtils.
436             getBdbSubDirectory(this.checkpointRecover.getDirectory());
437         List<IOException> errs = new ArrayList<IOException>();
438         FileUtils.copyFiles(bdbSubDir, CheckpointUtils.getJeLogsFilter(),
439             getStateDisk(), true, false, errs);
440         for (IOException ioe : errs) {
441             LOGGER.log(Level.SEVERE, "Problem copying checkpoint files: "
442                     +"checkpoint may be corrupt",ioe);
443         }
444         if (LOGGER.isLoggable(Level.INFO)) {
445             LOGGER.info("Finished recovery setup for checkpoint named " +
446                 this.checkpointRecover.getDisplayName() + " in " +
447                 (System.currentTimeMillis() - started) + "ms.");
448         }
449     }
450     
451     protected boolean getCheckpointCopyBdbjeLogs() {
452         return ((Boolean)this.order.getUncheckedAttribute(null,
453             CrawlOrder.ATTR_CHECKPOINT_COPY_BDBJE_LOGS)).booleanValue();
454     }
455     
456     private void setupBdb()
457     throws FatalConfigurationException, AttributeNotFoundException {
458         EnvironmentConfig envConfig = new EnvironmentConfig();
459         envConfig.setAllowCreate(true);
460         int bdbCachePercent = ((Integer)this.order.
461             getAttribute(null, CrawlOrder.ATTR_BDB_CACHE_PERCENT)).intValue();
462         if(bdbCachePercent > 0) {
463             // Operator has expressed a preference; override BDB default or 
464             // je.properties value
465             envConfig.setCachePercent(bdbCachePercent);
466         }
467         envConfig.setSharedCache(true);
468         envConfig.setLockTimeout(5000000); // 5 seconds
469         if (LOGGER.isLoggable(Level.FINEST)) {
470             envConfig.setConfigParam("java.util.logging.level", "SEVERE");
471             envConfig.setConfigParam("java.util.logging.level.evictor",
472                 "SEVERE");
473             envConfig.setConfigParam("java.util.logging.ConsoleHandler.on",
474                 "true");
475         }
476 
477         if (!getCheckpointCopyBdbjeLogs()) {
478             // If we are not copying files on checkpoint, then set bdbje to not
479             // remove its log files so that its possible to later assemble
480             // (manually) all needed to run a recovery using mix of current
481             // bdbje logs and those its marked for deletion.
482             envConfig.setConfigParam("je.cleaner.expunge", "false");
483         }
484                 
485         try {
486             this.bdbEnvironment = new EnhancedEnvironment(getStateDisk(), envConfig);
487             if (LOGGER.isLoggable(Level.FINE)) {
488                 // Write out the bdb configuration.
489                 envConfig = bdbEnvironment.getConfig();
490                 LOGGER.fine("BdbConfiguration: Cache percentage " +
491                     envConfig.getCachePercent() +
492                     ", cache size " + envConfig.getCacheSize());
493             }
494         } catch (DatabaseException e) {
495             e.printStackTrace();
496             throw new FatalConfigurationException(e.getMessage());
497         }
498     }
499     
500     /***
501      * @return the shared EnhancedEnvironment
502      */
503     public EnhancedEnvironment getBdbEnvironment() {
504         return this.bdbEnvironment;
505     }
506     
507     /***
508      * @deprecated use EnhancedEnvironment's getClassCatalog() instead
509      */
510     public StoredClassCatalog getClassCatalog() {
511         return this.bdbEnvironment.getClassCatalog();
512     }
513 
514     /***
515      * Register for CrawlStatus events.
516      *
517      * @param cl a class implementing the CrawlStatusListener interface
518      *
519      * @see CrawlStatusListener
520      */
521     public void addCrawlStatusListener(CrawlStatusListener cl) {
522         synchronized (this.registeredCrawlStatusListeners) {
523             this.registeredCrawlStatusListeners.add(cl);
524         }
525     }
526 
527     /***
528      * Register for CrawlURIDisposition events.
529      *
530      * @param cl a class implementing the CrawlURIDispostionListener interface
531      *
532      * @see CrawlURIDispositionListener
533      */
534     public void addCrawlURIDispositionListener(CrawlURIDispositionListener cl) {
535         registeredCrawlURIDispositionListener = null;
536         if (registeredCrawlURIDispositionListeners == null) {
537             // First listener;
538             registeredCrawlURIDispositionListener = cl;
539             //Only used for the first one while it is the only one.
540             registeredCrawlURIDispositionListeners 
541              = new ArrayList<CrawlURIDispositionListener>(1);
542             //We expect it to be very small.
543         }
544         registeredCrawlURIDispositionListeners.add(cl);
545     }
546 
547     /***
548      * Allows an external class to raise a CrawlURIDispostion
549      * crawledURISuccessful event that will be broadcast to all listeners that
550      * have registered with the CrawlController.
551      *
552      * @param curi - The CrawlURI that will be sent with the event notification.
553      *
554      * @see CrawlURIDispositionListener#crawledURISuccessful(CrawlURI)
555      */
556     public void fireCrawledURISuccessfulEvent(CrawlURI curi) {
557         if (registeredCrawlURIDispositionListener != null) {
558             // Then we'll just use that.
559             registeredCrawlURIDispositionListener.crawledURISuccessful(curi);
560         } else {
561             // Go through the list.
562             if (registeredCrawlURIDispositionListeners != null
563                 && registeredCrawlURIDispositionListeners.size() > 0) {
564                 Iterator it = registeredCrawlURIDispositionListeners.iterator();
565                 while (it.hasNext()) {
566                     (
567                         (CrawlURIDispositionListener) it
568                             .next())
569                             .crawledURISuccessful(
570                         curi);
571                 }
572             }
573         }
574     }
575 
576     /***
577      * Allows an external class to raise a CrawlURIDispostion
578      * crawledURINeedRetry event that will be broadcast to all listeners that
579      * have registered with the CrawlController.
580      *
581      * @param curi - The CrawlURI that will be sent with the event notification.
582      *
583      * @see CrawlURIDispositionListener#crawledURINeedRetry(CrawlURI)
584      */
585     public void fireCrawledURINeedRetryEvent(CrawlURI curi) {
586         if (registeredCrawlURIDispositionListener != null) {
587             // Then we'll just use that.
588             registeredCrawlURIDispositionListener.crawledURINeedRetry(curi);
589             return;
590         }
591         
592         // Go through the list.
593         if (registeredCrawlURIDispositionListeners != null
594                 && registeredCrawlURIDispositionListeners.size() > 0) {
595             for (Iterator i = registeredCrawlURIDispositionListeners.iterator();
596                     i.hasNext();) {
597                 ((CrawlURIDispositionListener)i.next()).crawledURINeedRetry(curi);
598             }
599         }
600     }
601 
602     /***
603      * Allows an external class to raise a CrawlURIDispostion
604      * crawledURIDisregard event that will be broadcast to all listeners that
605      * have registered with the CrawlController.
606      * 
607      * @param curi -
608      *            The CrawlURI that will be sent with the event notification.
609      * 
610      * @see CrawlURIDispositionListener#crawledURIDisregard(CrawlURI)
611      */
612     public void fireCrawledURIDisregardEvent(CrawlURI curi) {
613         if (registeredCrawlURIDispositionListener != null) {
614             // Then we'll just use that.
615             registeredCrawlURIDispositionListener.crawledURIDisregard(curi);
616         } else {
617             // Go through the list.
618             if (registeredCrawlURIDispositionListeners != null
619                 && registeredCrawlURIDispositionListeners.size() > 0) {
620                 Iterator it = registeredCrawlURIDispositionListeners.iterator();
621                 while (it.hasNext()) {
622                     (
623                         (CrawlURIDispositionListener) it
624                             .next())
625                             .crawledURIDisregard(
626                         curi);
627                 }
628             }
629         }
630     }
631 
632     /***
633      * Allows an external class to raise a CrawlURIDispostion crawledURIFailure event
634      * that will be broadcast to all listeners that have registered with the CrawlController.
635      *
636      * @param curi - The CrawlURI that will be sent with the event notification.
637      *
638      * @see CrawlURIDispositionListener#crawledURIFailure(CrawlURI)
639      */
640     public void fireCrawledURIFailureEvent(CrawlURI curi) {
641         if (registeredCrawlURIDispositionListener != null) {
642             // Then we'll just use that.
643             registeredCrawlURIDispositionListener.crawledURIFailure(curi);
644         } else {
645             // Go through the list.
646             if (registeredCrawlURIDispositionListeners != null
647                 && registeredCrawlURIDispositionListeners.size() > 0) {
648                 Iterator it = registeredCrawlURIDispositionListeners.iterator();
649                 while (it.hasNext()) {
650                     ((CrawlURIDispositionListener)it.next())
651                         .crawledURIFailure(curi);
652                 }
653             }
654         }
655     }
656 
657     private void setupCrawlModules() throws FatalConfigurationException,
658              AttributeNotFoundException, MBeanException, ReflectionException {
659         if (scope == null) {
660             scope = (CrawlScope) order.getAttribute(CrawlScope.ATTR_NAME);
661             scope.initialize(this);
662         }
663         try {
664             this.serverCache = new ServerCache(this);
665         } catch (Exception e) {
666             throw new FatalConfigurationException("Unable to" +
667                " initialize frontier (Failed setup of ServerCache) " + e);
668         }
669         
670         if (this.frontier == null) {
671             this.frontier = (Frontier)order.getAttribute(Frontier.ATTR_NAME);
672             try {
673                 frontier.initialize(this);
674                 frontier.pause(); // Pause until begun
675                 // Run recovery if recoverPath points to a file (If it points
676                 // to a directory, its a checkpoint recovery).
677                 // TODO: make recover path relative to job root dir.
678                 if (!isCheckpointRecover()) {
679                     runFrontierRecover((String)order.
680                         getAttribute(CrawlOrder.ATTR_RECOVER_PATH));
681                 }
682             } catch (IOException e) {
683                 throw new FatalConfigurationException(
684                     "unable to initialize frontier: " + e);
685             }
686         }
687 
688         // Setup processors
689         if (processorChains == null) {
690             processorChains = new ProcessorChainList(order);
691         }
692     }
693     
694     protected void runFrontierRecover(String recoverPath)
695             throws AttributeNotFoundException, MBeanException,
696             ReflectionException, FatalConfigurationException {
697         if (recoverPath == null || recoverPath.length() <= 0) {
698             return;
699         }
700         File f = new File(recoverPath);
701         if (!f.exists()) {
702             LOGGER.severe("Recover file does not exist " + f.getAbsolutePath());
703             return;
704         }
705         if (!f.isFile()) {
706             // Its a directory if supposed to be doing a checkpoint recover.
707             return;
708         }
709         boolean retainFailures = ((Boolean)order.
710           getAttribute(CrawlOrder.ATTR_RECOVER_RETAIN_FAILURES)).booleanValue();
711         try {
712             frontier.importRecoverLog(f.getAbsolutePath(), retainFailures);
713         } catch (IOException e) {
714             e.printStackTrace();
715             throw (FatalConfigurationException) new FatalConfigurationException(
716                 "Recover.log " + recoverPath + " problem: " + e).initCause(e);
717         }
718     }
719 
720     private void setupDisk() throws AttributeNotFoundException {
721         String diskPath
722             = (String) order.getAttribute(null, CrawlOrder.ATTR_DISK_PATH);
723         this.disk = getSettingsHandler().
724             getPathRelativeToWorkingDirectory(diskPath);
725         this.disk.mkdirs();
726         this.logsDisk = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
727         this.checkpointsDisk = getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH);
728         this.stateDisk = getSettingsDir(CrawlOrder.ATTR_STATE_PATH);
729         this.scratchDisk = getSettingsDir(CrawlOrder.ATTR_SCRATCH_PATH);
730     }
731     
732     /***
733      * @return The logging directory or null if problem reading the settings.
734      */
735     public File getLogsDir() {
736         File f = null;
737         try {
738             f = getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
739         } catch (AttributeNotFoundException e) {
740             LOGGER.severe("Failed get of logs directory: " + e.getMessage());
741         }
742         return f;
743     }
744     
745     /***
746      * Return fullpath to the directory named by <code>key</code>
747      * in settings.
748      * If directory does not exist, it and all intermediary dirs
749      * will be created.
750      * @param key Key to use going to settings.
751      * @return Full path to directory named by <code>key</code>.
752      * @throws AttributeNotFoundException
753      */
754     public File getSettingsDir(String key)
755     throws AttributeNotFoundException {
756         String path = (String)order.getAttribute(null, key);
757         File f = new File(path);
758         if (!f.isAbsolute()) {
759             f = new File(disk.getPath(), path);
760         }
761         if (!f.exists()) {
762             f.mkdirs();
763         }
764         return f;
765     }
766 
767     /***
768      * Setup the statistics tracker.
769      * The statistics object must be created before modules can use it.
770      * Do it here now so that when modules retrieve the object from the
771      * controller during initialization (which some do), its in place.
772      * @throws InvalidAttributeValueException
773      * @throws FatalConfigurationException
774      */
775     private void setupStatTracking()
776     throws InvalidAttributeValueException, FatalConfigurationException {
777         MapType loggers = order.getLoggers();
778         final String cstName = "crawl-statistics";
779         if (loggers.isEmpty(null)) {
780             if (!isCheckpointRecover() && this.statistics == null) {
781                 this.statistics = new StatisticsTracker(cstName);
782             }
783             loggers.addElement(null, (StatisticsTracker)this.statistics);
784         }
785         
786         if (isCheckpointRecover()) {
787             restoreStatisticsTracker(loggers, cstName);
788         }
789 
790         for (Iterator it = loggers.iterator(null); it.hasNext();) {
791             StatisticsTracking tracker = (StatisticsTracking)it.next();
792             tracker.initialize(this);
793             if (this.statistics == null) {
794                 this.statistics = tracker;
795             }
796         }
797     }
798     
799     protected void restoreStatisticsTracker(MapType loggers,
800         String replaceName)
801     throws FatalConfigurationException {
802         try {
803             // Add the deserialized statstracker to the settings system.
804             loggers.removeElement(loggers.globalSettings(), replaceName);
805             loggers.addElement(loggers.globalSettings(),
806                 (StatisticsTracker)this.statistics);
807          } catch (Exception e) {
808              throw convertToFatalConfigurationException(e);
809          }
810     }
811     
812     protected FatalConfigurationException
813             convertToFatalConfigurationException(Exception e) {
814         FatalConfigurationException fce =
815             new FatalConfigurationException("Converted exception: " +
816                e.getMessage());
817         fce.setStackTrace(e.getStackTrace());
818         return fce;
819     }
820 
821     private void setupLogs() throws IOException {
822         String logsPath = logsDisk.getAbsolutePath() + File.separatorChar;
823         uriProcessing = Logger.getLogger(LOGNAME_CRAWL + "." + logsPath);
824         runtimeErrors = Logger.getLogger(LOGNAME_RUNTIME_ERRORS + "." +
825             logsPath);
826         localErrors = Logger.getLogger(LOGNAME_LOCAL_ERRORS + "." + logsPath);
827         uriErrors = Logger.getLogger(LOGNAME_URI_ERRORS + "." + logsPath);
828         progressStats = Logger.getLogger(LOGNAME_PROGRESS_STATISTICS + "." +
829             logsPath);
830 
831         this.fileHandlers = new HashMap<Logger,FileHandler>();
832 
833         setupLogFile(uriProcessing,
834             logsPath + LOGNAME_CRAWL + CURRENT_LOG_SUFFIX,
835             new UriProcessingFormatter(), true);
836 
837         setupLogFile(runtimeErrors,
838             logsPath + LOGNAME_RUNTIME_ERRORS + CURRENT_LOG_SUFFIX,
839             new RuntimeErrorFormatter(), true);
840 
841         setupLogFile(localErrors,
842             logsPath + LOGNAME_LOCAL_ERRORS + CURRENT_LOG_SUFFIX,
843             new LocalErrorFormatter(), true);
844 
845         setupLogFile(uriErrors,
846             logsPath + LOGNAME_URI_ERRORS + CURRENT_LOG_SUFFIX,
847             new UriErrorFormatter(), true);
848 
849         setupLogFile(progressStats,
850             logsPath + LOGNAME_PROGRESS_STATISTICS + CURRENT_LOG_SUFFIX,
851             new StatisticsLogFormatter(), true);
852 
853     }
854 
855     private void setupLogFile(Logger logger, String filename, Formatter f,
856             boolean shouldManifest) throws IOException, SecurityException {
857         GenerationFileHandler fh = new GenerationFileHandler(filename, true,
858             shouldManifest);
859         fh.setFormatter(f);
860         logger.addHandler(fh);
861         addToManifest(filename, MANIFEST_LOG_FILE, shouldManifest);
862         logger.setUseParentHandlers(false);
863         this.fileHandlers.put(logger, fh);
864     }
865     
866     protected void rotateLogFiles(String generationSuffix)
867     throws IOException {
868         if (this.state != PAUSED && this.state != CHECKPOINTING) {
869             throw new IllegalStateException("Pause crawl before requesting " +
870                 "log rotation.");
871         }
872         for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) {
873             Logger l = (Logger)i.next();
874             GenerationFileHandler gfh =
875                 (GenerationFileHandler)fileHandlers.get(l);
876             GenerationFileHandler newGfh =
877                 gfh.rotate(generationSuffix, CURRENT_LOG_SUFFIX);
878             if (gfh.shouldManifest()) {
879                 addToManifest((String) newGfh.getFilenameSeries().get(1),
880                     MANIFEST_LOG_FILE, newGfh.shouldManifest());
881             }
882             l.removeHandler(gfh);
883             l.addHandler(newGfh);
884             fileHandlers.put(l, newGfh);
885         }
886     }
887 
888     /***
889      * Close all log files and remove handlers from loggers.
890      */
891     public void closeLogFiles() {
892        for (Iterator i = fileHandlers.keySet().iterator(); i.hasNext();) {
893             Logger l = (Logger)i.next();
894             GenerationFileHandler gfh =
895                 (GenerationFileHandler)fileHandlers.get(l);
896             gfh.close();
897             l.removeHandler(gfh);
898         }
899     }
900 
901     /***
902      * Sets the values for max bytes, docs and time based on crawl order. 
903      */
904     private void setThresholds() {
905         try {
906             maxBytes =
907                 ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_BYTES_DOWNLOAD))
908                     .longValue();
909         } catch (Exception e) {
910             maxBytes = 0;
911         }
912         try {
913             maxDocument =
914                 ((Long) order
915                     .getAttribute(CrawlOrder.ATTR_MAX_DOCUMENT_DOWNLOAD))
916                     .longValue();
917         } catch (Exception e) {
918             maxDocument = 0;
919         }
920         try {
921             maxTime =
922                 ((Long) order.getAttribute(CrawlOrder.ATTR_MAX_TIME_SEC))
923                     .longValue();
924         } catch (Exception e) {
925             maxTime = 0;
926         }
927     }
928 
929     /***
930      * @return Object this controller is using to track crawl statistics
931      */
932     public StatisticsTracking getStatistics() {
933         return statistics==null ?
934             new StatisticsTracker("crawl-statistics"): this.statistics;
935     }
936     
937     /***
938      * Send crawl change event to all listeners.
939      * @param newState State change we're to tell listeners' about.
940      * @param message Message on state change.
941      * @see #sendCheckpointEvent(File) for special case event sending
942      * telling listeners to checkpoint.
943      */
944     protected void sendCrawlStateChangeEvent(Object newState, String message) {
945         synchronized (this.registeredCrawlStatusListeners) {
946             this.state = newState;
947             for (Iterator i = this.registeredCrawlStatusListeners.iterator();
948                     i.hasNext();) {
949                 CrawlStatusListener l = (CrawlStatusListener)i.next();
950                 if (newState.equals(PAUSED)) {
951                    l.crawlPaused(message);
952                 } else if (newState.equals(RUNNING)) {
953                     l.crawlResuming(message);
954                 } else if (newState.equals(PAUSING)) {
955                    l.crawlPausing(message);
956                 } else if (newState.equals(STARTED)) {
957                     l.crawlStarted(message);
958                 } else if (newState.equals(STOPPING)) {
959                     l.crawlEnding(message);
960                 } else if (newState.equals(FINISHED)) {
961                     l.crawlEnded(message);
962                 } else if (newState.equals(PREPARING)) {
963                     l.crawlResuming(message);
964                 } else {
965                     throw new RuntimeException("Unknown state: " + newState);
966                 }
967                 if (LOGGER.isLoggable(Level.FINE)) {
968                     LOGGER.fine("Sent " + newState + " to " + l);
969                 }
970             }
971             LOGGER.fine("Sent " + newState);
972         }
973     }
974     
975     /***
976      * Send the checkpoint event.
977      * Has its own method apart from
978      * {@link #sendCrawlStateChangeEvent(Object, String)} because checkpointing
979      * throws an Exception (Didn't want to have to wrap all of the
980      * sendCrawlStateChangeEvent in try/catches).
981      * @param checkpointDir Where to write checkpoint state to.
982      * @throws Exception
983      */
984     protected void sendCheckpointEvent(File checkpointDir) throws Exception {
985         synchronized (this.registeredCrawlStatusListeners) {
986             if (this.state != PAUSED) {
987                 throw new IllegalStateException("Crawler must be completly " +
988                     "paused before checkpointing can start");
989             }
990             this.state = CHECKPOINTING;
991             for (Iterator i = this.registeredCrawlStatusListeners.iterator();
992                     i.hasNext();) {
993                 CrawlStatusListener l = (CrawlStatusListener)i.next();
994                 l.crawlCheckpoint(checkpointDir);
995                 if (LOGGER.isLoggable(Level.FINE)) {
996                     LOGGER.fine("Sent " + CHECKPOINTING + " to " + l);
997                 }
998             }
999             LOGGER.fine("Sent " + CHECKPOINTING);
1000         }
1001     }
1002 
1003     /***
1004      * Operator requested crawl begin
1005      */
1006     public void requestCrawlStart() {
1007         runProcessorInitialTasks();
1008 
1009         sendCrawlStateChangeEvent(STARTED, CrawlJob.STATUS_PENDING);
1010         String jobState;
1011         state = RUNNING;
1012         jobState = CrawlJob.STATUS_RUNNING;
1013         sendCrawlStateChangeEvent(this.state, jobState);
1014 
1015         // A proper exit will change this value.
1016         this.sExit = CrawlJob.STATUS_FINISHED_ABNORMAL;
1017         
1018         Thread statLogger = new Thread(statistics);
1019         statLogger.setName("StatLogger");
1020         statLogger.start();
1021         
1022         frontier.start();
1023     }
1024 
1025     /***
1026      * Called when the last toethread exits.
1027      */
1028     protected void completeStop() {
1029         LOGGER.fine("Entered complete stop.");
1030         // Run processors' final tasks
1031         runProcessorFinalTasks();
1032         // Ok, now we are ready to exit.
1033         sendCrawlStateChangeEvent(FINISHED, this.sExit);
1034         synchronized (this.registeredCrawlStatusListeners) {
1035             // Remove all listeners now we're done with them.
1036             this.registeredCrawlStatusListeners.
1037                 removeAll(this.registeredCrawlStatusListeners);
1038             this.registeredCrawlStatusListeners = null;
1039         }
1040         
1041         closeLogFiles();
1042         
1043         // Release reference to logger file handler instances.
1044         this.fileHandlers = null;
1045         this.uriErrors = null;
1046         this.uriProcessing = null;
1047         this.localErrors = null;
1048         this.runtimeErrors = null;
1049         this.progressStats = null;
1050         this.reports = null;
1051         this.manifest = null;
1052 
1053         // Do cleanup.
1054         this.statistics = null;
1055         this.frontier = null;
1056         this.disk = null;
1057         this.scratchDisk = null;
1058         this.order = null;
1059         this.scope = null;
1060         if (this.settingsHandler !=  null) {
1061             this.settingsHandler.cleanup();
1062         }
1063         this.settingsHandler = null;
1064         this.reserveMemory = null;
1065         this.processorChains = null;
1066         if (this.serverCache != null) {
1067             this.serverCache.cleanup();
1068             this.serverCache = null;
1069         }
1070         if (this.checkpointer != null) {
1071             this.checkpointer.cleanup();
1072             this.checkpointer = null;
1073         }
1074         if (this.bdbEnvironment != null) {
1075             try {
1076                 this.bdbEnvironment.sync();
1077                 this.bdbEnvironment.close();
1078             } catch (DatabaseException e) {
1079                 e.printStackTrace();
1080             }
1081             this.bdbEnvironment = null;
1082         }
1083         this.bigmaps = null;
1084         if (this.toePool != null) {
1085             this.toePool.cleanup();
1086             // I played with launching a thread here to do cleanup of the
1087             // ToePool ThreadGroup (making sure the cleanup thread was not
1088             // in the ToePool ThreadGroup).  Did this because ToePools seemed
1089             // to be sticking around holding references to CrawlController at
1090             // least.  Need to spend more time looking to see that this is
1091             // still the case even after adding the above toePool#cleanup call.
1092         }
1093         this.toePool = null;
1094         LOGGER.fine("Finished crawl.");
1095     }
1096     
1097     synchronized void completePause() {
1098         // Send a notifyAll. At least checkpointing thread may be waiting on a
1099         // complete pause.
1100         notifyAll();
1101         sendCrawlStateChangeEvent(PAUSED, CrawlJob.STATUS_PAUSED);
1102     }
1103 
1104     private boolean shouldContinueCrawling() {
1105         if (frontier.isEmpty()) {
1106             this.sExit = CrawlJob.STATUS_FINISHED;
1107             return false;
1108         }
1109 
1110         if (maxBytes > 0 && frontier.totalBytesWritten() >= maxBytes) {
1111             // Hit the max byte download limit!
1112             sExit = CrawlJob.STATUS_FINISHED_DATA_LIMIT;
1113             return false;
1114         } else if (maxDocument > 0
1115                 && frontier.succeededFetchCount() >= maxDocument) {
1116             // Hit the max document download limit!
1117             this.sExit = CrawlJob.STATUS_FINISHED_DOCUMENT_LIMIT;
1118             return false;
1119         } else if (maxTime > 0 &&
1120                 statistics.crawlDuration() >= maxTime * 1000) {
1121             // Hit the max byte download limit!
1122             this.sExit = CrawlJob.STATUS_FINISHED_TIME_LIMIT;
1123             return false;
1124         }
1125         return state == RUNNING;
1126     }
1127 
1128     /***
1129      * Request a checkpoint.
1130      * Sets a checkpointing thread running.
1131      * @throws IllegalStateException Thrown if crawl is not in paused state
1132      * (Crawl must be first paused before checkpointing).
1133      */
1134     public synchronized void requestCrawlCheckpoint()
1135     throws IllegalStateException {
1136         if (this.checkpointer == null) {
1137             return;
1138         }
1139         if (this.checkpointer.isCheckpointing()) {
1140             throw new IllegalStateException("Checkpoint already running.");
1141         }
1142         this.checkpointer.checkpoint();
1143     }   
1144     
1145     /***
1146      * @return True if checkpointing.
1147      */
1148     public boolean isCheckpointing() {
1149         return this.state == CHECKPOINTING;
1150     }
1151     
1152     /***
1153      * Run checkpointing.
1154      * CrawlController takes care of managing the checkpointing/serializing
1155      * of bdb, the StatisticsTracker, and the CheckpointContext.  Other
1156      * modules that want to revive themselves on checkpoint recovery need to
1157      * save state during their {@link CrawlStatusListener#crawlCheckpoint(File)}
1158      * invocation and then in their #initialize if a module,
1159      * or in their #initialTask if a processor, check with the CrawlController
1160      * if its checkpoint recovery. If it is, read in their old state from the
1161      * pointed to  checkpoint directory.
1162      * <p>Default access only to be called by Checkpointer.
1163      * @throws Exception
1164      */
1165     void checkpoint()
1166     throws Exception {
1167         // Tell registered listeners to checkpoint.
1168         sendCheckpointEvent(this.checkpointer.
1169             getCheckpointInProgressDirectory());
1170         
1171         // Rotate off crawler logs.
1172         LOGGER.fine("Rotating log files.");
1173         rotateLogFiles(CURRENT_LOG_SUFFIX + "." +
1174             this.checkpointer.getNextCheckpointName());
1175 
1176         // Sync the BigMap contents to bdb, if their bdb bigmaps.
1177         LOGGER.fine("BigMaps.");
1178         checkpointBigMaps(this.checkpointer.getCheckpointInProgressDirectory());
1179 
1180         // Note, on deserialization, the super CrawlType#parent
1181         // needs to be restored. Parent is '/crawl-order/loggers'.
1182         // The settings handler for this module also needs to be
1183         // restored. Both of these fields are private in the
1184         // super class. Adding the restored ST to crawl order should take
1185         // care of this.
1186 
1187         // Checkpoint bdb environment.
1188         LOGGER.fine("Bdb environment.");
1189         checkpointBdb(this.checkpointer.getCheckpointInProgressDirectory());
1190 
1191         // Make copy of order, seeds, and settings.
1192         LOGGER.fine("Copying settings.");
1193         copySettings(this.checkpointer.getCheckpointInProgressDirectory());
1194 
1195         // Checkpoint this crawlcontroller.
1196         CheckpointUtils.writeObjectToFile(this,
1197             this.checkpointer.getCheckpointInProgressDirectory());
1198     }
1199     
1200     /***
1201      * Copy off the settings.
1202      * @param checkpointDir Directory to write checkpoint to.
1203      * @throws IOException 
1204      */
1205     protected void copySettings(final File checkpointDir) throws IOException {
1206         final List files = this.settingsHandler.getListOfAllFiles();
1207         boolean copiedSettingsDir = false;
1208         final File settingsDir = new File(this.disk, "settings");
1209         for (final Iterator i = files.iterator(); i.hasNext();) {
1210             File f = new File((String)i.next());
1211             if (f.getAbsolutePath().startsWith(settingsDir.getAbsolutePath())) {
1212                 if (copiedSettingsDir) {
1213                     // Skip.  We've already copied this member of the
1214                     // settings directory.
1215                     continue;
1216                 }
1217                 // Copy 'settings' dir all in one lump, not a file at a time.
1218                 copiedSettingsDir = true;
1219                 FileUtils.copyFiles(settingsDir,
1220                     new File(checkpointDir, settingsDir.getName()));
1221                 continue;
1222             }
1223             FileUtils.copyFiles(f, f.isDirectory()? checkpointDir:
1224                 new File(checkpointDir, f.getName()));
1225         }
1226     }
1227     
1228     /***
1229      * Checkpoint bdb.
1230      * I used do a call to log cleaning as suggested in je-2.0 javadoc but takes
1231      * way too much time (20minutes for a crawl of 1million items). Assume
1232      * cleaner is keeping up. Below was log cleaning loop .
1233      * <pre>int totalCleaned = 0;
1234      * for (int cleaned = 0; (cleaned = this.bdbEnvironment.cleanLog()) != 0;
1235      *  totalCleaned += cleaned) {
1236      *      LOGGER.fine("Cleaned " + cleaned + " log files.");
1237      * }
1238      * </pre>
1239      * <p>I also used to do a sync. But, from Mark Hayes, sync and checkpoint
1240      * are effectively same thing only sync is not configurable.  He suggests
1241      * doing one or the other:
1242      * <p>MS: Reading code, Environment.sync() is a checkpoint.  Looks like
1243      * I don't need to call a checkpoint after calling a sync?
1244      * <p>MH: Right, they're almost the same thing -- just do one or the other,
1245      * not both.  With the new API, you'll need to do a checkpoint not a
1246      * sync, because the sync() method has no config parameter.  Don't worry
1247      * -- it's fine to do a checkpoint even though you're not using.
1248      * @param checkpointDir Directory to write checkpoint to.
1249      * @throws DatabaseException 
1250      * @throws IOException 
1251      * @throws RuntimeException Thrown if failed setup of new bdb environment.
1252      */
1253     protected void checkpointBdb(File checkpointDir)
1254     throws DatabaseException, IOException, RuntimeException {
1255         EnvironmentConfig envConfig = this.bdbEnvironment.getConfig();
1256         final List bkgrdThreads = Arrays.asList(new String []
1257             {"je.env.runCheckpointer", "je.env.runCleaner",
1258                 "je.env.runINCompressor"});
1259         try {
1260             // Disable background threads
1261             setBdbjeBkgrdThreads(envConfig, bkgrdThreads, "false");
1262             // Do a force checkpoint.  Thats what a sync does (i.e. doSync).
1263             CheckpointConfig chkptConfig = new CheckpointConfig();
1264             chkptConfig.setForce(true);
1265             
1266             // Mark Hayes of sleepycat says:
1267             // "The default for this property is false, which gives the current
1268             // behavior (allow deltas).  If this property is true, deltas are
1269             // prohibited -- full versions of internal nodes are always logged
1270             // during the checkpoint. When a full version of an internal node
1271             // is logged during a checkpoint, recovery does not need to process
1272             // it at all.  It is only fetched if needed by the application,
1273             // during normal DB operations after recovery. When a delta of an
1274             // internal node is logged during a checkpoint, recovery must
1275             // process it by fetching the full version of the node from earlier
1276             // in the log, and then applying the delta to it.  This can be
1277             // pretty slow, since it is potentially a large amount of
1278             // random I/O."
1279             chkptConfig.setMinimizeRecoveryTime(true);
1280             this.bdbEnvironment.checkpoint(chkptConfig);
1281             LOGGER.fine("Finished bdb checkpoint.");
1282             
1283             // From the sleepycat folks: A trick for flipping db logs.
1284             EnvironmentImpl envImpl = 
1285                 DbInternal.envGetEnvironmentImpl(this.bdbEnvironment);
1286             long firstFileInNextSet =
1287                 DbLsn.getFileNumber(envImpl.forceLogFileFlip());
1288             // So the last file in the checkpoint is firstFileInNextSet - 1.
1289             // Write manifest of all log files into the bdb directory.
1290             final String lastBdbCheckpointLog =
1291                 getBdbLogFileName(firstFileInNextSet - 1);
1292             processBdbLogs(checkpointDir, lastBdbCheckpointLog);
1293             LOGGER.fine("Finished processing bdb log files.");
1294         } finally {
1295             // Restore background threads.
1296             setBdbjeBkgrdThreads(envConfig, bkgrdThreads, "true");
1297         }
1298     }
1299     
1300     protected void processBdbLogs(final File checkpointDir,
1301             final String lastBdbCheckpointLog) throws IOException {
1302         File bdbDir = CheckpointUtils.getBdbSubDirectory(checkpointDir);
1303         if (!bdbDir.exists()) {
1304             bdbDir.mkdir();
1305         }
1306         PrintWriter pw = new PrintWriter(new FileOutputStream(new File(
1307              checkpointDir, "bdbje-logs-manifest.txt")));
1308         try {
1309             // Don't copy any beyond the last bdb log file (bdbje can keep
1310             // writing logs after checkpoint).
1311             boolean pastLastLogFile = false;
1312             Set<String> srcFilenames = null;
1313             final boolean copyFiles = getCheckpointCopyBdbjeLogs();
1314             do {
1315                 FilenameFilter filter = CheckpointUtils.getJeLogsFilter();
1316                 srcFilenames =
1317                     new HashSet<String>(Arrays.asList(
1318                             getStateDisk().list(filter)));
1319                 List tgtFilenames = Arrays.asList(bdbDir.list(filter));
1320                 if (tgtFilenames != null && tgtFilenames.size() > 0) {
1321                     srcFilenames.removeAll(tgtFilenames);
1322                 }
1323                 if (srcFilenames.size() > 0) {
1324                     // Sort files.
1325                     srcFilenames = new TreeSet<String>(srcFilenames);
1326                     int count = 0;
1327                     for (final Iterator i = srcFilenames.iterator();
1328                             i.hasNext() && !pastLastLogFile;) {
1329                         String name = (String) i.next();
1330                         if (copyFiles) {
1331                             FileUtils.copyFiles(new File(getStateDisk(), name),
1332                                 new File(bdbDir, name));
1333                         }
1334                         pw.println(name);
1335                         if (name.equals(lastBdbCheckpointLog)) {
1336                             // We're done.
1337                             pastLastLogFile = true;
1338                         }
1339                         count++;
1340                     }
1341                     if (LOGGER.isLoggable(Level.FINE)) {
1342                         LOGGER.fine("Copied " + count);
1343                     }
1344                 }
1345             } while (!pastLastLogFile && srcFilenames != null &&
1346                 srcFilenames.size() > 0);
1347         } finally {
1348             pw.close();
1349         }
1350     }
1351  
1352     protected String getBdbLogFileName(final long index) {
1353         String lastBdbLogFileHex = Long.toHexString(index);
1354         StringBuffer buffer = new StringBuffer();
1355         for (int i = 0; i < (8 - lastBdbLogFileHex.length()); i++) {
1356             buffer.append('0');
1357         }
1358         buffer.append(lastBdbLogFileHex);
1359         buffer.append(".jdb");
1360         return buffer.toString();
1361     }
1362     
1363     protected void setBdbjeBkgrdThreads(final EnvironmentConfig config,
1364             final List threads, final String setting) {
1365         for (final Iterator i = threads.iterator(); i.hasNext();) {
1366             config.setConfigParam((String)i.next(), setting);
1367         }
1368     }
1369     
1370     /***
1371      * Get recover checkpoint.
1372      * Returns null if we're NOT in recover mode.
1373      * Looks at ATTR_RECOVER_PATH and if its a directory, assumes checkpoint
1374      * recover. If checkpoint mode, returns Checkpoint instance if
1375      * checkpoint was VALID (else null).
1376      * @return Checkpoint instance if we're in recover checkpoint
1377      * mode and the pointed-to checkpoint was valid.
1378      * @see #isCheckpointRecover()
1379      */
1380     public synchronized Checkpoint getCheckpointRecover() {
1381         if (this.checkpointRecover != null) {
1382             return this.checkpointRecover;
1383         }
1384         return getCheckpointRecover(this.order);
1385     }
1386     
1387     public static Checkpoint getCheckpointRecover(final CrawlOrder order) {
1388         String path = (String)order.getUncheckedAttribute(null,
1389             CrawlOrder.ATTR_RECOVER_PATH);
1390         if (path == null || path.length() <= 0) {
1391             return null;
1392         }
1393         File rp = new File(path);
1394         // Assume if path is to a directory, its a checkpoint recover.
1395         Checkpoint result = null;
1396         if (rp.exists() && rp.isDirectory()) {
1397             Checkpoint cp = new Checkpoint(rp);
1398             if (cp.isValid()) {
1399                 // if valid, set as result.
1400                 result = cp;
1401             }
1402         }
1403         return result;
1404     }
1405     
1406     public static boolean isCheckpointRecover(final CrawlOrder order) {
1407         return getCheckpointRecover(order) != null;
1408     }
1409     
1410     /***
1411      * @return True if we're in checkpoint recover mode. Call
1412      * {@link #getCheckpointRecover()} to get at Checkpoint instance
1413      * that has info on checkpoint directory being recovered from.
1414      */
1415     public boolean isCheckpointRecover() {
1416         return this.checkpointRecover != null;
1417     }
1418 
1419     /***
1420      * Operator requested for crawl to stop.
1421      */
1422     public synchronized void requestCrawlStop() {
1423         requestCrawlStop(CrawlJob.STATUS_ABORTED);
1424     }
1425     
1426     /***
1427      * Operator requested for crawl to stop.
1428      * @param message 
1429      */
1430     public synchronized void requestCrawlStop(String message) {
1431         if (state == STOPPING || state == FINISHED) {
1432             return;
1433         }
1434         if (message == null) {
1435             throw new IllegalArgumentException("Message cannot be null.");
1436         }
1437         this.sExit = message;
1438         beginCrawlStop();
1439     }
1440 
1441     /***
1442      * Start the process of stopping the crawl. 
1443      */
1444     public void beginCrawlStop() {
1445         LOGGER.fine("Started.");
1446         sendCrawlStateChangeEvent(STOPPING, this.sExit);
1447         if (this.frontier != null) {
1448             this.frontier.terminate();
1449             this.frontier.unpause();
1450         }
1451         LOGGER.fine("Finished."); 
1452     }
1453     
1454     /***
1455      * Stop the crawl temporarly.
1456      */
1457     public synchronized void requestCrawlPause() {
1458         if (state == PAUSING || state == PAUSED) {
1459             // Already about to pause
1460             return;
1461         }
1462         sExit = CrawlJob.STATUS_WAITING_FOR_PAUSE;
1463         frontier.pause();
1464         sendCrawlStateChangeEvent(PAUSING, this.sExit);
1465         if (toePool.getActiveToeCount() == 0) {
1466             // if all threads already held, complete pause now
1467             // (no chance to trigger off later held thread)
1468             completePause();
1469         }
1470     }
1471 
1472     /***
1473      * Tell if the controller is paused
1474      * @return true if paused
1475      */
1476     public boolean isPaused() {
1477         return state == PAUSED;
1478     }
1479     
1480     public boolean isPausing() {
1481         return state == PAUSING;
1482     }
1483     
1484     public boolean isRunning() {
1485         return state == RUNNING;
1486     }
1487 
1488     /***
1489      * Resume crawl from paused state
1490      */
1491     public synchronized void requestCrawlResume() {
1492         if (state != PAUSING && state != PAUSED && state != CHECKPOINTING) {
1493             // Can't resume if not been told to pause or if we're in middle of
1494             // a checkpoint.
1495             return;
1496         }
1497         multiThreadMode();
1498         frontier.unpause();
1499         LOGGER.fine("Crawl resumed.");
1500         sendCrawlStateChangeEvent(RUNNING, CrawlJob.STATUS_RUNNING);
1501     }
1502 
1503     /***
1504      * @return Active toe thread count.
1505      */
1506     public int getActiveToeCount() {
1507         if (toePool == null) {
1508             return 0;
1509         }
1510         return toePool.getActiveToeCount();
1511     }
1512 
1513     private void setupToePool() {
1514         toePool = new ToePool(this);
1515         // TODO: make # of toes self-optimizing
1516         toePool.setSize(order.getMaxToes());
1517     }
1518 
1519     /***
1520      * @return The order file instance.
1521      */
1522     public CrawlOrder getOrder() {
1523         return order;
1524     }
1525 
1526     /***
1527      * @return The server cache instance.
1528      */
1529     public ServerCache getServerCache() {
1530         return serverCache;
1531     }
1532 
1533     /***
1534      * @param o
1535      */
1536     public void setOrder(CrawlOrder o) {
1537         order = o;
1538     }
1539 
1540 
1541     /***
1542      * @return The frontier.
1543      */
1544     public Frontier getFrontier() {
1545         return frontier;
1546     }
1547 
1548     /***
1549      * @return This crawl scope.
1550      */
1551     public CrawlScope getScope() {
1552         return scope;
1553     }
1554 
1555     /*** Get the list of processor chains.
1556      *
1557      * @return the list of processor chains.
1558      */
1559     public ProcessorChainList getProcessorChainList() {
1560         return processorChains;
1561     }
1562 
1563     /*** Get the first processor chain.
1564      *
1565      * @return the first processor chain.
1566      */
1567     public ProcessorChain getFirstProcessorChain() {
1568         return processorChains.getFirstChain();
1569     }
1570 
1571     /*** Get the postprocessor chain.
1572      *
1573      * @return the postprocessor chain.
1574      */
1575     public ProcessorChain getPostprocessorChain() {
1576         return processorChains.getLastChain();
1577     }
1578 
1579     /***
1580      * Get the 'working' directory of the current crawl.
1581      * @return the 'working' directory of the current crawl.
1582      */
1583     public File getDisk() {
1584         return disk;
1585     }
1586 
1587     /***
1588      * @return Scratch disk location.
1589      */
1590     public File getScratchDisk() {
1591         return scratchDisk;
1592     }
1593 
1594     /***
1595      * @return State disk location.
1596      */
1597     public File getStateDisk() {
1598         return stateDisk;
1599     }
1600 
1601     /***
1602      * @return The number of ToeThreads
1603      *
1604      * @see ToePool#getToeCount()
1605      */
1606     public int getToeCount() {
1607         return this.toePool == null? 0: this.toePool.getToeCount();
1608     }
1609 
1610     /***
1611      * @return The ToePool
1612      */
1613     public ToePool getToePool() {
1614         return toePool;
1615     }
1616     
1617     /***
1618      * @return toepool one-line report
1619      */
1620     public String oneLineReportThreads() {
1621         // TODO Auto-generated method stub
1622         return toePool.singleLineReport();
1623     }
1624 
1625     /***
1626      * While many settings will update automatically when the SettingsHandler is
1627      * modified, some settings need to be explicitly changed to reflect new
1628      * settings. This includes, number of toe threads and seeds.
1629      */
1630     public void kickUpdate() {
1631         
1632         installThreadContextSettingsHandler();
1633  
1634         toePool.setSize(order.getMaxToes());
1635         
1636         this.scope.kickUpdate();
1637         this.frontier.kickUpdate();
1638         this.processorChains.kickUpdate();
1639         
1640         // TODO: continue to generalize this, so that any major 
1641         // component can get a kick when it may need to refresh its data
1642 
1643         setThresholds();
1644     }
1645 
1646     /***
1647      * @return The settings handler.
1648      */
1649     public SettingsHandler getSettingsHandler() {
1650         return settingsHandler;
1651     }
1652 
1653     /***
1654      * This method iterates through processor chains to run processors' initial
1655      * tasks.
1656      *
1657      */
1658     private void runProcessorInitialTasks(){
1659         for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {
1660             for (Iterator ip = ((ProcessorChain) ic.next()).iterator();
1661                     ip.hasNext(); ) {
1662                 ((Processor) ip.next()).initialTasks();
1663             }
1664         }
1665     }
1666 
1667     /***
1668      * This method iterates through processor chains to run processors' final
1669      * tasks.
1670      *
1671      */
1672     private void runProcessorFinalTasks(){
1673         for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {
1674             for (Iterator ip = ((ProcessorChain) ic.next()).iterator();
1675                     ip.hasNext(); ) {
1676                 ((Processor) ip.next()).finalTasks();
1677             }
1678         }
1679     }
1680 
1681     /***
1682      * Kills a thread. For details see
1683      * {@link org.archive.crawler.framework.ToePool#killThread(int, boolean)
1684      * ToePool.killThread(int, boolean)}.
1685      * @param threadNumber Thread to kill.
1686      * @param replace Should thread be replaced.
1687      * @see org.archive.crawler.framework.ToePool#killThread(int, boolean)
1688      */
1689     public void killThread(int threadNumber, boolean replace){
1690         toePool.killThread(threadNumber, replace);
1691     }
1692 
1693     /***
1694      * Add a file to the manifest of files used/generated by the current
1695      * crawl.
1696      * 
1697      * TODO: Its possible for a file to be added twice if reports are
1698      * force generated midcrawl.  Fix.
1699      *
1700      * @param file The filename (with absolute path) of the file to add
1701      * @param type The type of the file
1702      * @param bundle Should the file be included in a typical bundling of
1703      *           crawler files.
1704      *
1705      * @see #MANIFEST_CONFIG_FILE
1706      * @see #MANIFEST_LOG_FILE
1707      * @see #MANIFEST_REPORT_FILE
1708      */
1709     public void addToManifest(String file, char type, boolean bundle) {
1710         manifest.append(type + (bundle? "+": "-") + " " + file + "\n");
1711     }
1712 
1713     /***
1714      * Evaluate if the crawl should stop because it is finished.
1715      */
1716     public void checkFinish() {
1717         if(atFinish()) {
1718             beginCrawlStop();
1719         }
1720     }
1721 
1722     /***
1723      * Evaluate if the crawl should stop because it is finished,
1724      * without actually stopping the crawl.
1725      * 
1726      * @return true if crawl is at a finish-possible state
1727      */
1728     public boolean atFinish() {
1729         return state == RUNNING && !shouldContinueCrawling();
1730     }
1731     
1732     private void readObject(ObjectInputStream stream)
1733     throws IOException, ClassNotFoundException {
1734         stream.defaultReadObject();
1735         // Setup status listeners
1736         this.registeredCrawlStatusListeners =
1737             Collections.synchronizedList(new ArrayList<CrawlStatusListener>());
1738         // Ensure no holdover singleThreadMode
1739         singleThreadMode = false; 
1740     }
1741 
1742     /***
1743      * Go to single thread mode, where only one ToeThread may
1744      * proceed at a time. Also acquires the single lock, so 
1745      * no further threads will proceed past an 
1746      * acquireContinuePermission. Caller mush be sure to release
1747      * lock to allow other threads to proceed one at a time. 
1748      */
1749     public void singleThreadMode() {
1750         this.singleThreadLock.lock();
1751         singleThreadMode = true; 
1752     }
1753 
1754     /***
1755      * Go to back to regular multi thread mode, where all
1756      * ToeThreads may proceed at once
1757      */
1758     public void multiThreadMode() {
1759         this.singleThreadLock.lock();
1760         singleThreadMode = false; 
1761         while(this.singleThreadLock.isHeldByCurrentThread()) {
1762             this.singleThreadLock.unlock();
1763         }
1764     }
1765     
1766     /***
1767      * Proceed only if allowed, giving CrawlController a chance
1768      * to enforce single-thread mode.
1769      */
1770     public void acquireContinuePermission() {
1771         if (singleThreadMode) {
1772             this.singleThreadLock.lock();
1773             if(!singleThreadMode) {
1774                 // If changed while waiting, ignore
1775                 while(this.singleThreadLock.isHeldByCurrentThread()) {
1776                     this.singleThreadLock.unlock();
1777                 }
1778             }
1779         } // else, permission is automatic
1780     }
1781 
1782     /***
1783      * Relinquish continue permission at end of processing (allowing
1784      * another thread to proceed if in single-thread mode). 
1785      */
1786     public void releaseContinuePermission() {
1787         if (singleThreadMode) {
1788             while(this.singleThreadLock.isHeldByCurrentThread()) {
1789                 this.singleThreadLock.unlock();
1790             }
1791         } // else do nothing; 
1792     }
1793     
1794     public void freeReserveMemory() {
1795         if(!reserveMemory.isEmpty()) {
1796             reserveMemory.removeLast();
1797             System.gc();
1798         }
1799     }
1800 
1801     /***
1802      * Note that a ToeThread reached paused condition, possibly
1803      * completing the crawl-pause. 
1804      */
1805     public synchronized void toePaused() {
1806         releaseContinuePermission();
1807         if (state ==  PAUSING && toePool.getActiveToeCount() == 0) {
1808             completePause();
1809         }
1810     }
1811     
1812     /***
1813      * Note that a ToeThread ended, possibly completing the crawl-stop. 
1814      */
1815     public synchronized void toeEnded() {
1816         if (state == STOPPING && toePool.getActiveToeCount() == 0) {
1817             completeStop();
1818         }
1819     }
1820 
1821     /***
1822      * Add order file contents to manifest.
1823      * Write configuration files and any files managed by CrawlController to
1824      * it - files managed by other classes, excluding the settings framework,
1825      * are responsible for adding their files to the manifest themselves.
1826      * by calling addToManifest.
1827      * Call before writing out reports.
1828      */
1829     public void addOrderToManifest() {
1830         for (Iterator it = getSettingsHandler().getListOfAllFiles().iterator();
1831                 it.hasNext();) {
1832             addToManifest((String)it.next(),
1833                 CrawlController.MANIFEST_CONFIG_FILE, true);
1834         }
1835     }
1836     
1837     /***
1838      * Log a URIException from deep inside other components to the crawl's
1839      * shared log. 
1840      * 
1841      * @param e URIException encountered
1842      * @param u CrawlURI where problem occurred
1843      * @param l String which could not be interpreted as URI without exception
1844      */
1845     public void logUriError(URIException e, UURI u, CharSequence l) {
1846         if (e.getReasonCode() == UURIFactory.IGNORED_SCHEME) {
1847             // don't log those that are intentionally ignored
1848             return; 
1849         }
1850         Object[] array = {u, l};
1851         uriErrors.log(Level.INFO, e.getMessage(), array);
1852     }
1853     
1854     // 
1855     // Reporter
1856     //
1857     public final static String PROCESSORS_REPORT = "processors";
1858     public final static String MANIFEST_REPORT = "manifest";
1859     protected final static String[] REPORTS = {PROCESSORS_REPORT, MANIFEST_REPORT};
1860     
1861     /* (non-Javadoc)
1862      * @see org.archive.util.Reporter#getReports()
1863      */
1864     public String[] getReports() {
1865         return REPORTS;
1866     }
1867 
1868     /* (non-Javadoc)
1869      * @see org.archive.util.Reporter#reportTo(java.io.Writer)
1870      */
1871     public void reportTo(PrintWriter writer) {
1872         reportTo(null,writer);
1873     }
1874 
1875     public String singleLineReport() {
1876         return ArchiveUtils.singleLineReport(this);
1877     }
1878 
1879     public void reportTo(String name, PrintWriter writer) {
1880         if(PROCESSORS_REPORT.equals(name)) {
1881             reportProcessorsTo(writer);
1882             return;
1883         } else if (MANIFEST_REPORT.equals(name)) {
1884             reportManifestTo(writer);
1885             return;
1886         } else if (name!=null) {
1887             writer.println("requested report unknown: "+name);
1888         }
1889         singleLineReportTo(writer);
1890     }
1891 
1892     /***
1893      * @param writer Where to write report to.
1894      */
1895     protected void reportManifestTo(PrintWriter writer) {
1896         writer.print(manifest.toString());
1897     }
1898 
1899     /***
1900      * Compiles and returns a human readable report on the active processors.
1901      * @param writer Where to write to.
1902      * @see org.archive.crawler.framework.Processor#report()
1903      */
1904     protected void reportProcessorsTo(PrintWriter writer) {
1905         writer.print(
1906             "Processors report - "
1907                 + ArchiveUtils.get12DigitDate()
1908                 + "\n");
1909         writer.print("  Job being crawled:    " + getOrder().getCrawlOrderName()
1910                 + "\n");
1911 
1912         writer.print("  Number of Processors: " +
1913             processorChains.processorCount() + "\n");
1914         writer.print("  NOTE: Some processors may not return a report!\n\n");
1915 
1916         for (Iterator ic = processorChains.iterator(); ic.hasNext(); ) {
1917             for (Iterator ip = ((ProcessorChain) ic.next()).iterator();
1918                     ip.hasNext(); ) {
1919                 writer.print(((Processor) ip.next()).report());
1920             }
1921         }
1922     }
1923 
1924     public void singleLineReportTo(PrintWriter writer) {
1925         // TODO: imrpvoe to be summary of crawl state
1926         writer.write("[Crawl Controller]\n");
1927     }
1928 
1929     public String singleLineLegend() {
1930         // TODO improve
1931         return "nothingYet";
1932     }
1933     
1934     /***
1935      * Call this method to get instance of the crawler BigMap implementation.
1936      * A "BigMap" is a Map that knows how to manage ever-growing sets of
1937      * key/value pairs. If we're in a checkpoint recovery, this method will
1938      * manage reinstantiation of checkpointed bigmaps.
1939      * @param dbName Name to give any associated database.  Also used
1940      * as part of name serializing out bigmap.  Needs to be unique to a crawl.
1941      * @param keyClass Class of keys we'll be using.
1942      * @param valueClass Class of values we'll be using.
1943      * @return Map that knows how to carry large sets of key/value pairs or
1944      * if none available, returns instance of HashMap.
1945      * @throws Exception
1946      */
1947     public <K,V> Map<K,V> getBigMap(final String dbName, 
1948             final Class<? super K> keyClass,
1949             final Class<? super V> valueClass)
1950     throws Exception {
1951         CachedBdbMap<K,V> result = new CachedBdbMap<K,V>(dbName);
1952         if (isCheckpointRecover()) {
1953             File baseDir = getCheckpointRecover().getDirectory();
1954             @SuppressWarnings("unchecked")
1955             CachedBdbMap<K,V> temp = CheckpointUtils.
1956                 readObjectFromFile(result.getClass(), dbName, baseDir);
1957             result = temp;
1958         }
1959         result.initialize(getBdbEnvironment(), keyClass, valueClass,
1960                 getBdbEnvironment().getClassCatalog());
1961         // Save reference to all big maps made so can manage their
1962         // checkpointing.
1963         this.bigmaps.put(dbName, result);
1964         return result;
1965     }
1966     
1967     protected void checkpointBigMaps(final File cpDir)
1968     throws Exception {
1969         for (final Iterator i = this.bigmaps.keySet().iterator(); i.hasNext();) {
1970             Object key = i.next();
1971             Object obj = this.bigmaps.get(key);
1972             // TODO: I tried adding sync to custom serialization of BigMap
1973             // implementation but data member counts of the BigMap
1974             // implementation were not being persisted properly.  Look at
1975             // why.  For now, do sync in advance of serialization for now.
1976             ((CachedBdbMap)obj).sync();
1977             CheckpointUtils.writeObjectToFile(obj, (String)key, cpDir);
1978         }
1979     }
1980 
1981     /***
1982      * Called whenever progress statistics logging event.
1983      * @param e Progress statistics event.
1984      */
1985     public void progressStatisticsEvent(final EventObject e) {
1986         // Default is to do nothing.  Subclass if you want to catch this event.
1987         // Later, if demand, add publisher/listener support.  Currently hacked
1988         // in so the subclass in CrawlJob added to support JMX can send
1989         // notifications of progressStatistics change.
1990     }
1991     
1992     /***
1993      * Log to the progress statistics log.
1994      * @param msg Message to write the progress statistics log.
1995      */
1996     public void logProgressStatistics(final String msg) {
1997         this.progressStats.info(msg);
1998     }
1999 
2000     /***
2001      * @return CrawlController state.
2002      */
2003     public Object getState() {
2004         return this.state;
2005     }
2006 
2007     public File getCheckpointsDisk() {
2008         return this.checkpointsDisk;
2009     }
2010 }