1 package org.archive.crawler.postprocessor;
2
3 import java.util.logging.Level;
4 import java.util.logging.Logger;
5
6 import org.archive.crawler.datamodel.CrawlURI;
7 import org.archive.crawler.framework.Processor;
8 import org.archive.crawler.frontier.AdaptiveRevisitAttributeConstants;
9
10 /***
11 * Set a URI to be revisited by the ARFrontier. This only makes sense when using
12 * the ARFrontier and a decide-rule chain granting only selected access to this
13 * processor. This is the opposite of the RejectRevisitProcessor class.
14 *
15 * @author mzsanford
16 */
17 public class AcceptRevisitProcessor extends Processor implements
18 AdaptiveRevisitAttributeConstants {
19 private static final long serialVersionUID = 4310432303089418844L;
20
21 private static final Logger logger = Logger
22 .getLogger(AcceptRevisitProcessor.class.getName());
23
24 public AcceptRevisitProcessor(String name) {
25 super(name, "Set a URI to be revisited by the ARFrontier.");
26 }
27
28 @Override
29 protected void initialTasks() {
30 CrawlURI.addAlistPersistentMember(A_DISCARD_REVISIT);
31 }
32
33 @Override
34 protected void innerProcess(CrawlURI curi) throws InterruptedException {
35 if (curi != null && curi.containsKey(A_DISCARD_REVISIT)) {
36 if (logger.isLoggable(Level.FINE)) {
37 logger.fine("Removing DISCARD_REVISIT boolean from crawl URI: "
38 + curi.getUURI().toString());
39 }
40 curi.remove(A_DISCARD_REVISIT);
41 }
42 }
43
44 }