1 package org.archive.crawler.postprocessor;
2
3 import java.util.logging.Level;
4 import java.util.logging.Logger;
5
6 import org.archive.crawler.datamodel.CrawlURI;
7 import org.archive.crawler.framework.Processor;
8 import org.archive.crawler.frontier.AdaptiveRevisitAttributeConstants;
9
10 /***
11 * Set a URI to not be revisited by the ARFrontier. This only makes sense when
12 * using the ARFrontier and a decide-rule chain granting only selected access to
13 * this processor. This is the opposite of the AcceptRevisitProcessor class.
14 *
15 * @author mzsanford
16 */
17 public class RejectRevisitProcessor extends Processor implements
18 AdaptiveRevisitAttributeConstants {
19 private static final long serialVersionUID = 4310432303089418844L;
20
21 private static final Logger logger = Logger
22 .getLogger(RejectRevisitProcessor.class.getName());
23
24 public RejectRevisitProcessor(String name) {
25 super(name, "Set a URI to not be revisited by the ARFrontier.");
26 }
27
28 @Override
29 protected void initialTasks() {
30 CrawlURI.addAlistPersistentMember(A_DISCARD_REVISIT);
31 }
32
33 @Override
34 protected void innerProcess(CrawlURI curi) throws InterruptedException {
35 if (curi != null) {
36 if (logger.isLoggable(Level.FINE)) {
37 logger.fine("Adding DISCARD_REVISIT=true to Crawl URI: "
38 + curi.getUURI().toString());
39 }
40 curi.putObject(A_DISCARD_REVISIT, Boolean.TRUE);
41 }
42 }
43
44 }