1 /* RobotsDirectives.java
2 *
3 * $Id: PrefixSet.java 4947 2007-03-01 04:47:24Z gojomo $
4 *
5 * Created April 29, 2008
6 *
7 * Copyright (C) 2008 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25 package org.archive.crawler.datamodel;
26
27 import java.io.Serializable;
28
29 import org.archive.util.PrefixSet;
30
31 /***
32 * Represents the directives that apply to a user-agent (or set of
33 * user-agents)
34 */
35 public class RobotsDirectives implements Serializable {
36 private static final long serialVersionUID = 5386542759286155383L;
37
38 PrefixSet disallows = new PrefixSet();
39 PrefixSet allows = new PrefixSet();
40 float crawlDelay = -1;
41
42 public boolean allows(String path) {
43 if(disallows.containsPrefixOf(path)) {
44 return allows.containsPrefixOf(path);
45 }
46 return true;
47 }
48
49 public void addDisallow(String path) {
50 if(path.length()==0) {
51 // ignore empty-string disallows
52 // (they really mean allow, when alone)
53 return;
54 }
55 disallows.add(path);
56 }
57
58 public void addAllow(String path) {
59 allows.add(path);
60 }
61
62 public void setCrawlDelay(float i) {
63 crawlDelay=i;
64 }
65
66 public float getCrawlDelay() {
67 return crawlDelay;
68 }
69 }