1   /*
2    * ArchiveUtils
3    *
4    * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/util/ArchiveUtils.java,v 1.38 2007/01/23 00:29:48 gojomo Exp $
5    *
6    * Created on Jul 7, 2003
7    *
8    * Copyright (C) 2003 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify
13   * it under the terms of the GNU Lesser Public License as published by
14   * the Free Software Foundation; either version 2.1 of the License, or
15   * any later version.
16   *
17   * Heritrix is distributed in the hope that it will be useful,
18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20   * GNU Lesser Public License for more details.
21   *
22   * You should have received a copy of the GNU Lesser Public License
23   * along with Heritrix; if not, write to the Free Software
24   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25   *
26   */
27  package org.archive.util;
28  
29  import java.io.IOException;
30  import java.io.PrintWriter;
31  import java.io.StringWriter;
32  import java.text.NumberFormat;
33  import java.text.ParseException;
34  import java.text.SimpleDateFormat;
35  import java.util.Arrays;
36  import java.util.Calendar;
37  import java.util.Date;
38  import java.util.GregorianCalendar;
39  import java.util.HashSet;
40  import java.util.Locale;
41  import java.util.Set;
42  import java.util.TimeZone;
43  
44  /***
45   * Miscellaneous useful methods.
46   *
47   * @author gojomo & others
48   */
49  public class ArchiveUtils {
50  
51      /***
52       * Arc-style date stamp in the format yyyyMMddHHmm and UTC time zone.
53       */
54      private static final ThreadLocal<SimpleDateFormat> 
55          TIMESTAMP12 = threadLocalDateFormat("yyyyMMddHHmm");;
56      
57      /***
58       * Arc-style date stamp in the format yyyyMMddHHmmss and UTC time zone.
59       */
60      private static final ThreadLocal<SimpleDateFormat> 
61         TIMESTAMP14 = threadLocalDateFormat("yyyyMMddHHmmss");
62      /***
63       * Arc-style date stamp in the format yyyyMMddHHmmssSSS and UTC time zone.
64       */
65      private static final ThreadLocal<SimpleDateFormat> 
66          TIMESTAMP17 = threadLocalDateFormat("yyyyMMddHHmmssSSS");
67  
68      /***
69       * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
70       * UTC time zone is assumed.
71       */
72      private static final ThreadLocal<SimpleDateFormat> 
73          TIMESTAMP17ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
74      
75      /***
76       * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss'Z'
77       * UTC time zone is assumed.
78       */
79      private static final ThreadLocal<SimpleDateFormat>
80          TIMESTAMP14ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
81      
82      /***
83       * Default character to use padding strings.
84       */
85      private static final char DEFAULT_PAD_CHAR = ' ';
86  
87      /*** milliseconds in an hour */ 
88      private static final int HOUR_IN_MS = 60 * 60 * 1000;
89      /*** milliseconds in a day */
90      private static final int DAY_IN_MS = 24 * HOUR_IN_MS;
91      
92      private static ThreadLocal<SimpleDateFormat> threadLocalDateFormat(final String pattern) {
93          ThreadLocal<SimpleDateFormat> tl = new ThreadLocal<SimpleDateFormat>() {
94              protected SimpleDateFormat initialValue() {
95                  SimpleDateFormat df = new SimpleDateFormat(pattern);
96                  df.setTimeZone(TimeZone.getTimeZone("GMT"));
97                  return df;
98              }
99          };
100         return tl;
101     }
102     
103     public static int MAX_INT_CHAR_WIDTH =
104         Integer.toString(Integer.MAX_VALUE).length();
105     
106     /***
107      * Utility function for creating arc-style date stamps
108      * in the format yyyMMddHHmmssSSS.
109      * Date stamps are in the UTC time zone
110      * @return the date stamp
111      */
112     public static String get17DigitDate(){
113         return TIMESTAMP17.get().format(new Date());
114     }
115 
116     /***
117      * Utility function for creating arc-style date stamps
118      * in the format yyyMMddHHmmss.
119      * Date stamps are in the UTC time zone
120      * @return the date stamp
121      */
122     public static String get14DigitDate(){
123         return TIMESTAMP14.get().format(new Date());
124     }
125 
126     /***
127      * Utility function for creating arc-style date stamps
128      * in the format yyyMMddHHmm.
129      * Date stamps are in the UTC time zone
130      * @return the date stamp
131      */
132     public static String get12DigitDate(){
133         return TIMESTAMP12.get().format(new Date());
134     }
135 
136     /***
137      * Utility function for creating log timestamps, in
138      * W3C/ISO8601 format, assuming UTC. Use current time. 
139      * 
140      * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
141      * 
142      * @return the date stamp
143      */
144     public static String getLog17Date(){
145         return TIMESTAMP17ISO8601Z.get().format(new Date());
146     }
147     
148     /***
149      * Utility function for creating log timestamps, in
150      * W3C/ISO8601 format, assuming UTC. 
151      * 
152      * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
153      * @param date Date to format.
154      * 
155      * @return the date stamp
156      */
157     public static String getLog17Date(long date){
158         return TIMESTAMP17ISO8601Z.get().format(new Date(date));
159     }
160     
161     /***
162      * Utility function for creating log timestamps, in
163      * W3C/ISO8601 format, assuming UTC. Use current time. 
164      * 
165      * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
166      * 
167      * @return the date stamp
168      */
169     public static String getLog14Date(){
170         return TIMESTAMP14ISO8601Z.get().format(new Date());
171     }
172     
173     /***
174      * Utility function for creating log timestamps, in
175      * W3C/ISO8601 format, assuming UTC. 
176      * 
177      * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
178      * @param date long timestamp to format.
179      * 
180      * @return the date stamp
181      */
182     public static String getLog14Date(long date){
183         return TIMESTAMP14ISO8601Z.get().format(new Date(date));
184     }
185     
186     /***
187      * Utility function for creating log timestamps, in
188      * W3C/ISO8601 format, assuming UTC. 
189      * 
190      * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
191      * @param date Date to format.
192      * 
193      * @return the date stamp
194      */
195     public static String getLog14Date(Date date){
196         return TIMESTAMP14ISO8601Z.get().format(date);
197     }
198     
199     /***
200      * Utility function for creating arc-style date stamps
201      * in the format yyyyMMddHHmmssSSS.
202      * Date stamps are in the UTC time zone
203      *
204      * @param date milliseconds since epoc
205      * @return the date stamp
206      */
207     public static String get17DigitDate(long date){
208         return TIMESTAMP17.get().format(new Date(date));
209     }
210     
211     public static String get17DigitDate(Date date){
212         return TIMESTAMP17.get().format(date);
213     }
214 
215     /***
216      * Utility function for creating arc-style date stamps
217      * in the format yyyyMMddHHmmss.
218      * Date stamps are in the UTC time zone
219      *
220      * @param date milliseconds since epoc
221      * @return the date stamp
222      */
223     public static String get14DigitDate(long date){
224         return TIMESTAMP14.get().format(new Date(date));
225     }
226 
227     public static String get14DigitDate(Date d) {
228         return TIMESTAMP14.get().format(d);
229     }
230 
231     /***
232      * Utility function for creating arc-style date stamps
233      * in the format yyyyMMddHHmm.
234      * Date stamps are in the UTC time zone
235      *
236      * @param date milliseconds since epoc
237      * @return the date stamp
238      */
239     public static String get12DigitDate(long date){
240         return TIMESTAMP12.get().format(new Date(date));
241     }
242     
243     public static String get12DigitDate(Date d) {
244         return TIMESTAMP12.get().format(d);
245     }
246     
247     /***
248      * Parses an ARC-style date.  If passed String is < 12 characters in length,
249      * we pad.  At a minimum, String should contain a year (>=4 characters).
250      * Parse will also fail if day or month are incompletely specified.  Depends
251      * on the above getXXDigitDate methods.
252      * @param A 4-17 digit date in ARC style (<code>yyyy</code> to
253      * <code>yyyyMMddHHmmssSSS</code>) formatting.  
254      * @return A Date object representing the passed String. 
255      * @throws ParseException
256      */
257     public static Date getDate(String d) throws ParseException {
258         Date date = null;
259         if (d == null) {
260             throw new IllegalArgumentException("Passed date is null");
261         }
262         switch (d.length()) {
263         case 14:
264             date = ArchiveUtils.parse14DigitDate(d);
265             break;
266 
267         case 17:
268             date = ArchiveUtils.parse17DigitDate(d);
269             break;
270 
271         case 12:
272             date = ArchiveUtils.parse12DigitDate(d);
273             break;
274            
275         case 0:
276         case 1:
277         case 2:
278         case 3:
279             throw new ParseException("Date string must at least contain a" +
280                 "year: " + d, d.length());
281             
282         default:
283             if (!(d.startsWith("19") || d.startsWith("20"))) {
284                 throw new ParseException("Unrecognized century: " + d, 0);
285             }
286             if (d.length() < 8 && (d.length() % 2) != 0) {
287                 throw new ParseException("Incomplete month/date: " + d,
288                     d.length());
289             }
290             StringBuilder sb = new StringBuilder(d);
291             if (sb.length() < 8) {
292                 for (int i = sb.length(); sb.length() < 8; i += 2) {
293                     sb.append("01");
294                 }
295             }
296             if (sb.length() < 12) {
297                 for (int i = sb.length(); sb.length() < 12; i++) {
298                     sb.append("0");
299                 }
300             }
301             date = ArchiveUtils.parse12DigitDate(sb.toString());
302         }
303 
304         return date;
305     }
306 
307     /***
308      * Utility function for parsing arc-style date stamps
309      * in the format yyyMMddHHmmssSSS.
310      * Date stamps are in the UTC time zone.  The whole string will not be
311      * parsed, only the first 17 digits.
312      *
313      * @param date an arc-style formatted date stamp
314      * @return the Date corresponding to the date stamp string
315      * @throws ParseException if the inputstring was malformed
316      */
317     public static Date parse17DigitDate(String date) throws ParseException {
318         return TIMESTAMP17.get().parse(date);
319     }
320 
321     /***
322      * Utility function for parsing arc-style date stamps
323      * in the format yyyMMddHHmmss.
324      * Date stamps are in the UTC time zone.  The whole string will not be
325      * parsed, only the first 14 digits.
326      *
327      * @param date an arc-style formatted date stamp
328      * @return the Date corresponding to the date stamp string
329      * @throws ParseException if the inputstring was malformed
330      */
331     public static Date parse14DigitDate(String date) throws ParseException{
332         return TIMESTAMP14.get().parse(date);
333     }
334 
335     /***
336      * Utility function for parsing arc-style date stamps
337      * in the format yyyMMddHHmm.
338      * Date stamps are in the UTC time zone.  The whole string will not be
339      * parsed, only the first 12 digits.
340      *
341      * @param date an arc-style formatted date stamp
342      * @return the Date corresponding to the date stamp string
343      * @throws ParseException if the inputstring was malformed
344      */
345     public static Date parse12DigitDate(String date) throws ParseException{
346         return TIMESTAMP12.get().parse(date);
347     }
348     
349     /***
350      * Convert 17-digit date format timestamps (as found in crawl.log, for
351      * example) into a GregorianCalendar object. + * Useful so you can convert
352      * into milliseconds-since-epoch. Note: it is possible to compute
353      * milliseconds-since-epoch + * using {@link #parse17DigitDate}.UTC(), but
354      * that method is deprecated in favor of using Calendar.getTimeInMillis(). + *
355      * <p/>I probably should have dug into all the utility methods in
356      * DateFormat.java to parse the timestamp, but this was + * easier. If
357      * someone wants to fix this to use those methods, please have at it! <p/>
358      * Mike Schwartz, schwartz at CodeOnTheRoad dot com.
359      * 
360      * @param timestamp17String
361      * @return Calendar set to <code>timestamp17String</code>.
362      */
363     public static Calendar timestamp17ToCalendar(String timestamp17String) {
364         GregorianCalendar calendar = new GregorianCalendar();
365         int year = Integer.parseInt(timestamp17String.substring(0, 4));
366         int dayOfMonth = Integer.parseInt(timestamp17String.substring(6, 8));
367         // Month is 0-based
368         int month = Integer.parseInt(timestamp17String.substring(4, 6)) - 1;
369         int hourOfDay = Integer.parseInt(timestamp17String.substring(8, 10));
370         int minute = Integer.parseInt(timestamp17String.substring(10, 12));
371         int second = Integer.parseInt(timestamp17String.substring(12, 14));
372         int milliseconds = Integer
373                 .parseInt(timestamp17String.substring(14, 17));
374         calendar.set(Calendar.YEAR, year);
375         calendar.set(Calendar.MONTH, month);
376         calendar.set(Calendar.DAY_OF_MONTH, dayOfMonth);
377         calendar.set(Calendar.HOUR_OF_DAY, hourOfDay);
378         calendar.set(Calendar.MINUTE, minute);
379         calendar.set(Calendar.SECOND, second);
380         calendar.set(Calendar.MILLISECOND, milliseconds);
381         return calendar;
382     }
383     
384     /***
385      * @param timestamp A 14-digit timestamp or the suffix for a 14-digit
386      * timestamp: E.g. '20010909014640' or '20010101' or '1970'.
387      * @return Seconds since the epoch as a string zero-pre-padded so always
388      * Integer.MAX_VALUE wide (Makes it so sorting of resultant string works
389      * properly).
390      * @throws ParseException 
391      */
392     public static String secondsSinceEpoch(String timestamp)
393     throws ParseException {
394         return zeroPadInteger((int)
395             (getSecondsSinceEpoch(timestamp).getTime()/1000));
396     }
397     
398     /***
399      * @param timestamp A 14-digit timestamp or the suffix for a 14-digit
400      * timestamp: E.g. '20010909014640' or '20010101' or '1970'.
401      * @return A date.
402      * @see #secondsSinceEpoch(String)
403      * @throws ParseException 
404      */
405     public static Date getSecondsSinceEpoch(String timestamp)
406     throws ParseException {
407         if (timestamp.length() < 14) {
408             if (timestamp.length() < 10 && (timestamp.length() % 2) == 1) {
409                 throw new IllegalArgumentException("Must have year, " +
410                     "month, date, hour or second granularity: " + timestamp);
411             }
412             if (timestamp.length() == 4) {
413                 // Add first month and first date.
414                 timestamp = timestamp + "01010000";
415             }
416             if (timestamp.length() == 6) {
417                 // Add a date of the first.
418                 timestamp = timestamp + "010000";
419             }
420             if (timestamp.length() < 14) {
421                 timestamp = timestamp +
422                     ArchiveUtils.padTo("", 14 - timestamp.length(), '0');
423             }
424         }
425         return ArchiveUtils.parse14DigitDate(timestamp);
426     }
427     
428     /***
429      * @param i Integer to add prefix of zeros too.  If passed
430      * 2005, will return the String <code>0000002005</code>. String
431      * width is the width of Integer.MAX_VALUE as a string (10
432      * digits).
433      * @return Padded String version of <code>i</code>.
434      */
435     public static String zeroPadInteger(int i) {
436         return ArchiveUtils.padTo(Integer.toString(i),
437                 MAX_INT_CHAR_WIDTH, '0');
438     }
439 
440     /*** 
441      * Convert an <code>int</code> to a <code>String</code>, and pad it to
442      * <code>pad</code> spaces.
443      * @param i the int
444      * @param pad the width to pad to.
445      * @return String w/ padding.
446      */
447     public static String padTo(final int i, final int pad) {
448         String n = Integer.toString(i);
449         return padTo(n, pad);
450     }
451     
452     /*** 
453      * Pad the given <code>String</code> to <code>pad</code> characters wide
454      * by pre-pending spaces.  <code>s</code> should not be <code>null</code>.
455      * If <code>s</code> is already wider than <code>pad</code> no change is
456      * done.
457      *
458      * @param s the String to pad
459      * @param pad the width to pad to.
460      * @return String w/ padding.
461      */
462     public static String padTo(final String s, final int pad) {
463         return padTo(s, pad, DEFAULT_PAD_CHAR);
464     }
465 
466     /*** 
467      * Pad the given <code>String</code> to <code>pad</code> characters wide
468      * by pre-pending <code>padChar</code>.
469      * 
470      * <code>s</code> should not be <code>null</code>. If <code>s</code> is
471      * already wider than <code>pad</code> no change is done.
472      *
473      * @param s the String to pad
474      * @param pad the width to pad to.
475      * @param padChar The pad character to use.
476      * @return String w/ padding.
477      */
478     public static String padTo(final String s, final int pad,
479             final char padChar) {
480         String result = s;
481         int l = s.length();
482         if (l < pad) {
483             StringBuffer sb = new StringBuffer(pad);
484             while(l < pad) {
485                 sb.append(padChar);
486                 l++;
487             }
488             sb.append(s);
489             result = sb.toString();
490         }
491         return result;
492     }
493 
494     /*** check that two byte arrays are equal.  They may be <code>null</code>.
495      *
496      * @param lhs a byte array
497      * @param rhs another byte array.
498      * @return <code>true</code> if they are both equal (or both
499      * <code>null</code>)
500      */
501     public static boolean byteArrayEquals(final byte[] lhs, final byte[] rhs) {
502         if (lhs == null && rhs != null || lhs != null && rhs == null) {
503             return false;
504         }
505         if (lhs==rhs) {
506             return true;
507         }
508         if (lhs.length != rhs.length) {
509             return false;
510         }
511         for(int i = 0; i<lhs.length; i++) {
512             if (lhs[i]!=rhs[i]) {
513                 return false;
514             }
515         }
516         return true;
517     }
518 
519     /***
520      * Converts a double to a string.
521      * @param val The double to convert
522      * @param precision How many characters to include after '.'
523      * @return the double as a string.
524      */
525     public static String doubleToString(double val, int maxFractionDigits){
526         return doubleToString(val, maxFractionDigits, 0);
527     }
528 
529     private static String doubleToString(double val, int maxFractionDigits, int minFractionDigits) {
530         NumberFormat f = NumberFormat.getNumberInstance(Locale.US); 
531         f.setMaximumFractionDigits(maxFractionDigits);
532         f.setMinimumFractionDigits(minFractionDigits);
533         return f.format(val); 
534     }
535 
536     /***
537      * Takes a byte size and formats it for display with 'friendly' units. 
538      * <p>
539      * This involves converting it to the largest unit 
540      * (of B, KB, MB, GB, TB) for which the amount will be > 1.
541      * <p>
542      * Additionally, at least 2 significant digits are always displayed. 
543      * <p>
544      * Displays as bytes (B): 0-1023
545      * Displays as kilobytes (KB): 1024 - 2097151 (~2Mb)
546      * Displays as megabytes (MB): 2097152 - 4294967295 (~4Gb)
547      * Displays as gigabytes (GB): 4294967296 - infinity
548      * <p>
549      * Negative numbers will be returned as '0 B'.
550      *
551      * @param amount the amount of bytes
552      * @return A string containing the amount, properly formated.
553      */
554     public static String formatBytesForDisplay(long amount) {
555         double displayAmount = (double) amount;
556         int unitPowerOf1024 = 0; 
557 
558         if(amount <= 0){
559             return "0 B";
560         }
561         
562         while(displayAmount>=1024 && unitPowerOf1024 < 4) {
563             displayAmount = displayAmount / 1024;
564             unitPowerOf1024++;
565         }
566         
567         // TODO: get didactic, make these KiB, MiB, GiB, TiB
568         final String[] units = { " B", " KB", " MB", " GB", " TB" };
569         
570         // ensure at least 2 significant digits (#.#) for small displayValues
571         int fractionDigits = (displayAmount < 10) ? 1 : 0; 
572         return doubleToString(displayAmount, fractionDigits, fractionDigits) 
573                    + units[unitPowerOf1024];
574     }
575 
576     /***
577      * Convert milliseconds value to a human-readable duration
578      * @param time
579      * @return Human readable string version of passed <code>time</code>
580      */
581     public static String formatMillisecondsToConventional(long time) {
582         return formatMillisecondsToConventional(time,true);
583     }
584     
585     /***
586      * Convert milliseconds value to a human-readable duration
587      * @param time
588      * @param toMs whether to print to the ms
589      * @return Human readable string version of passed <code>time</code>
590      */
591     public static String formatMillisecondsToConventional(long time, boolean toMs) {
592         StringBuffer sb = new StringBuffer();
593         if(time<0) {
594             sb.append("-");
595         }
596         long absTime = Math.abs(time);
597         if(!toMs && absTime < 1000) {
598             return "0s";
599         }
600         if(absTime > DAY_IN_MS) {
601             // days
602             sb.append(absTime / DAY_IN_MS + "d");
603             absTime = absTime % DAY_IN_MS;
604         }
605         if (absTime > HOUR_IN_MS) {
606             //got hours.
607             sb.append(absTime / HOUR_IN_MS + "h");
608             absTime = absTime % HOUR_IN_MS;
609         }
610         if (absTime > 60000) {
611             sb.append(absTime / 60000 + "m");
612             absTime = absTime % 60000;
613         }
614         if (absTime > 1000) {
615             sb.append(absTime / 1000 + "s");
616             absTime = absTime % 1000;
617         }
618         if(toMs) {
619             sb.append(absTime + "ms");
620         }
621         return sb.toString();
622     }
623 
624 
625     /***
626      * Generate a long UID based on the given class and version number.
627      * Using this instead of the default will assume serialization
628      * compatibility across class changes unless version number is
629      * intentionally bumped.
630      *
631      * @param class1
632      * @param version
633      * @return UID based off class and version number.
634      */
635     public static long classnameBasedUID(Class class1, int version) {
636         String callingClassname = class1.getName();
637         return (long)callingClassname.hashCode() << 32 + version;
638     }
639     
640     /***
641      * Copy the raw bytes of a long into a byte array, starting at
642      * the specified offset.
643      * 
644      * @param l
645      * @param array
646      * @param offset
647      */
648     public static void longIntoByteArray(long l, byte[] array, int offset) {
649         int i, shift;
650                   
651         for(i = 0, shift = 56; i < 8; i++, shift -= 8)
652         array[offset+i] = (byte)(0xFF & (l >> shift));
653     }
654     
655     public static long byteArrayIntoLong(byte [] bytearray) {
656         return byteArrayIntoLong(bytearray, 0);
657     }
658     
659     /***
660      * Byte array into long.
661      * @param bytearray Array to convert to a long.
662      * @param offset Offset into array at which we start decoding the long.
663      * @return Long made of the bytes of <code>array</code> beginning at
664      * offset <code>offset</code>.
665      * @see #longIntoByteArray(long, byte[], int)
666      */
667     public static long byteArrayIntoLong(byte [] bytearray,
668             int offset) {
669         long result = 0;
670         for (int i = offset; i < 8 /*Bytes in long*/; i++) {
671             result = (result << 8 /*Bits in byte*/) |
672                 (0xff & (byte)(bytearray[i] & 0xff));
673         }
674         return result;
675     }
676 
677     /***
678      * Given a string that may be a plain host or host/path (without
679      * URI scheme), add an implied http:// if necessary. 
680      * 
681      * @param u string to evaluate
682      * @return string with http:// added if no scheme already present
683      */
684     public static String addImpliedHttpIfNecessary(String u) {
685         if(u.indexOf(':') == -1 || u.indexOf('.') < u.indexOf(':')) {
686             // No scheme present; prepend "http://"
687             u = "http://" + u;
688         }
689         return u;
690     }
691 
692     /***
693      * Verify that the array begins with the prefix. 
694      * 
695      * @param array
696      * @param prefix
697      * @return true if array is identical to prefix for the first prefix.length
698      * positions 
699      */
700     public static boolean startsWith(byte[] array, byte[] prefix) {
701         if(prefix.length>array.length) {
702             return false;
703         }
704         for(int i = 0; i < prefix.length; i++) {
705             if(array[i]!=prefix[i]) {
706                 return false; 
707             }
708         }
709         return true; 
710     }
711 
712     /***
713      * Utility method to get a String singleLineReport from Reporter
714      * @param rep  Reporter to get singleLineReport from
715      * @return String of report
716      */
717     public static String singleLineReport(Reporter rep) {
718         StringWriter sw = new StringWriter();
719         PrintWriter pw = new PrintWriter(sw);
720         try {
721             rep.singleLineReportTo(pw);
722         } catch (IOException e) {
723             // not really possible
724             e.printStackTrace();
725         }
726         pw.flush();
727         return sw.toString();
728     }
729 
730     /***
731      * Compose the requested report into a String. DANGEROUS IF REPORT
732      * CAN BE LARGE.
733      * 
734      * @param rep Reported
735      * @param name String name of report to compose
736      * @return String of report
737      */
738     public static String writeReportToString(Reporter rep, String name) {
739         StringWriter sw = new StringWriter();
740         PrintWriter pw = new PrintWriter(sw);
741         rep.reportTo(name,pw);
742         pw.flush();
743         return sw.toString();
744     }
745     
746     public static Set<String> TLDS;
747     
748     static {
749         TLDS = new HashSet<String>();
750         // from http://data.iana.org/TLD/tlds-alpha-by-domain.txt
751         // # Version 2008071601, Last Updated Thu Jul 17 08:07:01 2008 UTC
752         String[] tldsArray = { "AC", "AD", "AE", "AERO", "AF", "AG", "AI",
753                 "AL", "AM", "AN", "AO", "AQ", "AR", "ARPA", "AS", "ASIA", "AT",
754                 "AU", "AW", "AX", "AZ", "BA", "BB", "BD", "BE", "BF", "BG",
755                 "BH", "BI", "BIZ", "BJ", "BM", "BN", "BO", "BR", "BS", "BT",
756                 "BV", "BW", "BY", "BZ", "CA", "CAT", "CC", "CD", "CF", "CG",
757                 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "COM", "COOP", "CR",
758                 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO",
759                 "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", "EU", "FI",
760                 "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF",
761                 "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR",
762                 "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT",
763                 "HU", "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ",
764                 "IR", "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG",
765                 "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
766                 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY",
767                 "MA", "MC", "MD", "ME", "MG", "MH", "MIL", "MK", "ML", "MM",
768                 "MN", "MO", "MOBI", "MP", "MQ", "MR", "MS", "MT", "MU",
769                 "MUSEUM", "MV", "MW", "MX", "MY", "MZ", "NA", "NAME", "NC",
770                 "NE", "NET", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU",
771                 "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", "PH", "PK", "PL",
772                 "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", "QA", "RE",
773                 "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG",
774                 "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST",
775                 "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH",
776                 "TJ", "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT",
777                 "TV", "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA",
778                 "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D",
779                 "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A",
780                 "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA",
781                 "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV",
782                 "XN--ZCKZAH", "YE", "YT", "YU", "ZA", "ZM", "ZW" };
783         TLDS.addAll(Arrays.asList(tldsArray));
784     }
785     /***
786      * Return whether the given string represents a known 
787      * top-level-domain (like "com", "org", etc.) per IANA
788      * as of 2008071601. 
789      * 
790      * @param dom candidate string
791      * @return boolean true if recognized as TLD
792      */
793     public static boolean isTld(String dom) {
794         return TLDS.contains(dom.toUpperCase());
795     }
796 }
797