1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package org.archive.util;
28
29 import java.io.IOException;
30 import java.io.PrintWriter;
31 import java.io.StringWriter;
32 import java.text.NumberFormat;
33 import java.text.ParseException;
34 import java.text.SimpleDateFormat;
35 import java.util.Arrays;
36 import java.util.Calendar;
37 import java.util.Date;
38 import java.util.GregorianCalendar;
39 import java.util.HashSet;
40 import java.util.Locale;
41 import java.util.Set;
42 import java.util.TimeZone;
43
44 /***
45 * Miscellaneous useful methods.
46 *
47 * @author gojomo & others
48 */
49 public class ArchiveUtils {
50
51 /***
52 * Arc-style date stamp in the format yyyyMMddHHmm and UTC time zone.
53 */
54 private static final ThreadLocal<SimpleDateFormat>
55 TIMESTAMP12 = threadLocalDateFormat("yyyyMMddHHmm");;
56
57 /***
58 * Arc-style date stamp in the format yyyyMMddHHmmss and UTC time zone.
59 */
60 private static final ThreadLocal<SimpleDateFormat>
61 TIMESTAMP14 = threadLocalDateFormat("yyyyMMddHHmmss");
62 /***
63 * Arc-style date stamp in the format yyyyMMddHHmmssSSS and UTC time zone.
64 */
65 private static final ThreadLocal<SimpleDateFormat>
66 TIMESTAMP17 = threadLocalDateFormat("yyyyMMddHHmmssSSS");
67
68 /***
69 * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
70 * UTC time zone is assumed.
71 */
72 private static final ThreadLocal<SimpleDateFormat>
73 TIMESTAMP17ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
74
75 /***
76 * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss'Z'
77 * UTC time zone is assumed.
78 */
79 private static final ThreadLocal<SimpleDateFormat>
80 TIMESTAMP14ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
81
82 /***
83 * Default character to use padding strings.
84 */
85 private static final char DEFAULT_PAD_CHAR = ' ';
86
87 /*** milliseconds in an hour */
88 private static final int HOUR_IN_MS = 60 * 60 * 1000;
89 /*** milliseconds in a day */
90 private static final int DAY_IN_MS = 24 * HOUR_IN_MS;
91
92 private static ThreadLocal<SimpleDateFormat> threadLocalDateFormat(final String pattern) {
93 ThreadLocal<SimpleDateFormat> tl = new ThreadLocal<SimpleDateFormat>() {
94 protected SimpleDateFormat initialValue() {
95 SimpleDateFormat df = new SimpleDateFormat(pattern);
96 df.setTimeZone(TimeZone.getTimeZone("GMT"));
97 return df;
98 }
99 };
100 return tl;
101 }
102
103 public static int MAX_INT_CHAR_WIDTH =
104 Integer.toString(Integer.MAX_VALUE).length();
105
106 /***
107 * Utility function for creating arc-style date stamps
108 * in the format yyyMMddHHmmssSSS.
109 * Date stamps are in the UTC time zone
110 * @return the date stamp
111 */
112 public static String get17DigitDate(){
113 return TIMESTAMP17.get().format(new Date());
114 }
115
116 /***
117 * Utility function for creating arc-style date stamps
118 * in the format yyyMMddHHmmss.
119 * Date stamps are in the UTC time zone
120 * @return the date stamp
121 */
122 public static String get14DigitDate(){
123 return TIMESTAMP14.get().format(new Date());
124 }
125
126 /***
127 * Utility function for creating arc-style date stamps
128 * in the format yyyMMddHHmm.
129 * Date stamps are in the UTC time zone
130 * @return the date stamp
131 */
132 public static String get12DigitDate(){
133 return TIMESTAMP12.get().format(new Date());
134 }
135
136 /***
137 * Utility function for creating log timestamps, in
138 * W3C/ISO8601 format, assuming UTC. Use current time.
139 *
140 * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
141 *
142 * @return the date stamp
143 */
144 public static String getLog17Date(){
145 return TIMESTAMP17ISO8601Z.get().format(new Date());
146 }
147
148 /***
149 * Utility function for creating log timestamps, in
150 * W3C/ISO8601 format, assuming UTC.
151 *
152 * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
153 * @param date Date to format.
154 *
155 * @return the date stamp
156 */
157 public static String getLog17Date(long date){
158 return TIMESTAMP17ISO8601Z.get().format(new Date(date));
159 }
160
161 /***
162 * Utility function for creating log timestamps, in
163 * W3C/ISO8601 format, assuming UTC. Use current time.
164 *
165 * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
166 *
167 * @return the date stamp
168 */
169 public static String getLog14Date(){
170 return TIMESTAMP14ISO8601Z.get().format(new Date());
171 }
172
173 /***
174 * Utility function for creating log timestamps, in
175 * W3C/ISO8601 format, assuming UTC.
176 *
177 * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
178 * @param date long timestamp to format.
179 *
180 * @return the date stamp
181 */
182 public static String getLog14Date(long date){
183 return TIMESTAMP14ISO8601Z.get().format(new Date(date));
184 }
185
186 /***
187 * Utility function for creating log timestamps, in
188 * W3C/ISO8601 format, assuming UTC.
189 *
190 * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
191 * @param date Date to format.
192 *
193 * @return the date stamp
194 */
195 public static String getLog14Date(Date date){
196 return TIMESTAMP14ISO8601Z.get().format(date);
197 }
198
199 /***
200 * Utility function for creating arc-style date stamps
201 * in the format yyyyMMddHHmmssSSS.
202 * Date stamps are in the UTC time zone
203 *
204 * @param date milliseconds since epoc
205 * @return the date stamp
206 */
207 public static String get17DigitDate(long date){
208 return TIMESTAMP17.get().format(new Date(date));
209 }
210
211 public static String get17DigitDate(Date date){
212 return TIMESTAMP17.get().format(date);
213 }
214
215 /***
216 * Utility function for creating arc-style date stamps
217 * in the format yyyyMMddHHmmss.
218 * Date stamps are in the UTC time zone
219 *
220 * @param date milliseconds since epoc
221 * @return the date stamp
222 */
223 public static String get14DigitDate(long date){
224 return TIMESTAMP14.get().format(new Date(date));
225 }
226
227 public static String get14DigitDate(Date d) {
228 return TIMESTAMP14.get().format(d);
229 }
230
231 /***
232 * Utility function for creating arc-style date stamps
233 * in the format yyyyMMddHHmm.
234 * Date stamps are in the UTC time zone
235 *
236 * @param date milliseconds since epoc
237 * @return the date stamp
238 */
239 public static String get12DigitDate(long date){
240 return TIMESTAMP12.get().format(new Date(date));
241 }
242
243 public static String get12DigitDate(Date d) {
244 return TIMESTAMP12.get().format(d);
245 }
246
247 /***
248 * Parses an ARC-style date. If passed String is < 12 characters in length,
249 * we pad. At a minimum, String should contain a year (>=4 characters).
250 * Parse will also fail if day or month are incompletely specified. Depends
251 * on the above getXXDigitDate methods.
252 * @param A 4-17 digit date in ARC style (<code>yyyy</code> to
253 * <code>yyyyMMddHHmmssSSS</code>) formatting.
254 * @return A Date object representing the passed String.
255 * @throws ParseException
256 */
257 public static Date getDate(String d) throws ParseException {
258 Date date = null;
259 if (d == null) {
260 throw new IllegalArgumentException("Passed date is null");
261 }
262 switch (d.length()) {
263 case 14:
264 date = ArchiveUtils.parse14DigitDate(d);
265 break;
266
267 case 17:
268 date = ArchiveUtils.parse17DigitDate(d);
269 break;
270
271 case 12:
272 date = ArchiveUtils.parse12DigitDate(d);
273 break;
274
275 case 0:
276 case 1:
277 case 2:
278 case 3:
279 throw new ParseException("Date string must at least contain a" +
280 "year: " + d, d.length());
281
282 default:
283 if (!(d.startsWith("19") || d.startsWith("20"))) {
284 throw new ParseException("Unrecognized century: " + d, 0);
285 }
286 if (d.length() < 8 && (d.length() % 2) != 0) {
287 throw new ParseException("Incomplete month/date: " + d,
288 d.length());
289 }
290 StringBuilder sb = new StringBuilder(d);
291 if (sb.length() < 8) {
292 for (int i = sb.length(); sb.length() < 8; i += 2) {
293 sb.append("01");
294 }
295 }
296 if (sb.length() < 12) {
297 for (int i = sb.length(); sb.length() < 12; i++) {
298 sb.append("0");
299 }
300 }
301 date = ArchiveUtils.parse12DigitDate(sb.toString());
302 }
303
304 return date;
305 }
306
307 /***
308 * Utility function for parsing arc-style date stamps
309 * in the format yyyMMddHHmmssSSS.
310 * Date stamps are in the UTC time zone. The whole string will not be
311 * parsed, only the first 17 digits.
312 *
313 * @param date an arc-style formatted date stamp
314 * @return the Date corresponding to the date stamp string
315 * @throws ParseException if the inputstring was malformed
316 */
317 public static Date parse17DigitDate(String date) throws ParseException {
318 return TIMESTAMP17.get().parse(date);
319 }
320
321 /***
322 * Utility function for parsing arc-style date stamps
323 * in the format yyyMMddHHmmss.
324 * Date stamps are in the UTC time zone. The whole string will not be
325 * parsed, only the first 14 digits.
326 *
327 * @param date an arc-style formatted date stamp
328 * @return the Date corresponding to the date stamp string
329 * @throws ParseException if the inputstring was malformed
330 */
331 public static Date parse14DigitDate(String date) throws ParseException{
332 return TIMESTAMP14.get().parse(date);
333 }
334
335 /***
336 * Utility function for parsing arc-style date stamps
337 * in the format yyyMMddHHmm.
338 * Date stamps are in the UTC time zone. The whole string will not be
339 * parsed, only the first 12 digits.
340 *
341 * @param date an arc-style formatted date stamp
342 * @return the Date corresponding to the date stamp string
343 * @throws ParseException if the inputstring was malformed
344 */
345 public static Date parse12DigitDate(String date) throws ParseException{
346 return TIMESTAMP12.get().parse(date);
347 }
348
349 /***
350 * Convert 17-digit date format timestamps (as found in crawl.log, for
351 * example) into a GregorianCalendar object. + * Useful so you can convert
352 * into milliseconds-since-epoch. Note: it is possible to compute
353 * milliseconds-since-epoch + * using {@link #parse17DigitDate}.UTC(), but
354 * that method is deprecated in favor of using Calendar.getTimeInMillis(). + *
355 * <p/>I probably should have dug into all the utility methods in
356 * DateFormat.java to parse the timestamp, but this was + * easier. If
357 * someone wants to fix this to use those methods, please have at it! <p/>
358 * Mike Schwartz, schwartz at CodeOnTheRoad dot com.
359 *
360 * @param timestamp17String
361 * @return Calendar set to <code>timestamp17String</code>.
362 */
363 public static Calendar timestamp17ToCalendar(String timestamp17String) {
364 GregorianCalendar calendar = new GregorianCalendar();
365 int year = Integer.parseInt(timestamp17String.substring(0, 4));
366 int dayOfMonth = Integer.parseInt(timestamp17String.substring(6, 8));
367
368 int month = Integer.parseInt(timestamp17String.substring(4, 6)) - 1;
369 int hourOfDay = Integer.parseInt(timestamp17String.substring(8, 10));
370 int minute = Integer.parseInt(timestamp17String.substring(10, 12));
371 int second = Integer.parseInt(timestamp17String.substring(12, 14));
372 int milliseconds = Integer
373 .parseInt(timestamp17String.substring(14, 17));
374 calendar.set(Calendar.YEAR, year);
375 calendar.set(Calendar.MONTH, month);
376 calendar.set(Calendar.DAY_OF_MONTH, dayOfMonth);
377 calendar.set(Calendar.HOUR_OF_DAY, hourOfDay);
378 calendar.set(Calendar.MINUTE, minute);
379 calendar.set(Calendar.SECOND, second);
380 calendar.set(Calendar.MILLISECOND, milliseconds);
381 return calendar;
382 }
383
384 /***
385 * @param timestamp A 14-digit timestamp or the suffix for a 14-digit
386 * timestamp: E.g. '20010909014640' or '20010101' or '1970'.
387 * @return Seconds since the epoch as a string zero-pre-padded so always
388 * Integer.MAX_VALUE wide (Makes it so sorting of resultant string works
389 * properly).
390 * @throws ParseException
391 */
392 public static String secondsSinceEpoch(String timestamp)
393 throws ParseException {
394 return zeroPadInteger((int)
395 (getSecondsSinceEpoch(timestamp).getTime()/1000));
396 }
397
398 /***
399 * @param timestamp A 14-digit timestamp or the suffix for a 14-digit
400 * timestamp: E.g. '20010909014640' or '20010101' or '1970'.
401 * @return A date.
402 * @see #secondsSinceEpoch(String)
403 * @throws ParseException
404 */
405 public static Date getSecondsSinceEpoch(String timestamp)
406 throws ParseException {
407 if (timestamp.length() < 14) {
408 if (timestamp.length() < 10 && (timestamp.length() % 2) == 1) {
409 throw new IllegalArgumentException("Must have year, " +
410 "month, date, hour or second granularity: " + timestamp);
411 }
412 if (timestamp.length() == 4) {
413
414 timestamp = timestamp + "01010000";
415 }
416 if (timestamp.length() == 6) {
417
418 timestamp = timestamp + "010000";
419 }
420 if (timestamp.length() < 14) {
421 timestamp = timestamp +
422 ArchiveUtils.padTo("", 14 - timestamp.length(), '0');
423 }
424 }
425 return ArchiveUtils.parse14DigitDate(timestamp);
426 }
427
428 /***
429 * @param i Integer to add prefix of zeros too. If passed
430 * 2005, will return the String <code>0000002005</code>. String
431 * width is the width of Integer.MAX_VALUE as a string (10
432 * digits).
433 * @return Padded String version of <code>i</code>.
434 */
435 public static String zeroPadInteger(int i) {
436 return ArchiveUtils.padTo(Integer.toString(i),
437 MAX_INT_CHAR_WIDTH, '0');
438 }
439
440 /***
441 * Convert an <code>int</code> to a <code>String</code>, and pad it to
442 * <code>pad</code> spaces.
443 * @param i the int
444 * @param pad the width to pad to.
445 * @return String w/ padding.
446 */
447 public static String padTo(final int i, final int pad) {
448 String n = Integer.toString(i);
449 return padTo(n, pad);
450 }
451
452 /***
453 * Pad the given <code>String</code> to <code>pad</code> characters wide
454 * by pre-pending spaces. <code>s</code> should not be <code>null</code>.
455 * If <code>s</code> is already wider than <code>pad</code> no change is
456 * done.
457 *
458 * @param s the String to pad
459 * @param pad the width to pad to.
460 * @return String w/ padding.
461 */
462 public static String padTo(final String s, final int pad) {
463 return padTo(s, pad, DEFAULT_PAD_CHAR);
464 }
465
466 /***
467 * Pad the given <code>String</code> to <code>pad</code> characters wide
468 * by pre-pending <code>padChar</code>.
469 *
470 * <code>s</code> should not be <code>null</code>. If <code>s</code> is
471 * already wider than <code>pad</code> no change is done.
472 *
473 * @param s the String to pad
474 * @param pad the width to pad to.
475 * @param padChar The pad character to use.
476 * @return String w/ padding.
477 */
478 public static String padTo(final String s, final int pad,
479 final char padChar) {
480 String result = s;
481 int l = s.length();
482 if (l < pad) {
483 StringBuffer sb = new StringBuffer(pad);
484 while(l < pad) {
485 sb.append(padChar);
486 l++;
487 }
488 sb.append(s);
489 result = sb.toString();
490 }
491 return result;
492 }
493
494 /*** check that two byte arrays are equal. They may be <code>null</code>.
495 *
496 * @param lhs a byte array
497 * @param rhs another byte array.
498 * @return <code>true</code> if they are both equal (or both
499 * <code>null</code>)
500 */
501 public static boolean byteArrayEquals(final byte[] lhs, final byte[] rhs) {
502 if (lhs == null && rhs != null || lhs != null && rhs == null) {
503 return false;
504 }
505 if (lhs==rhs) {
506 return true;
507 }
508 if (lhs.length != rhs.length) {
509 return false;
510 }
511 for(int i = 0; i<lhs.length; i++) {
512 if (lhs[i]!=rhs[i]) {
513 return false;
514 }
515 }
516 return true;
517 }
518
519 /***
520 * Converts a double to a string.
521 * @param val The double to convert
522 * @param precision How many characters to include after '.'
523 * @return the double as a string.
524 */
525 public static String doubleToString(double val, int maxFractionDigits){
526 return doubleToString(val, maxFractionDigits, 0);
527 }
528
529 private static String doubleToString(double val, int maxFractionDigits, int minFractionDigits) {
530 NumberFormat f = NumberFormat.getNumberInstance(Locale.US);
531 f.setMaximumFractionDigits(maxFractionDigits);
532 f.setMinimumFractionDigits(minFractionDigits);
533 return f.format(val);
534 }
535
536 /***
537 * Takes a byte size and formats it for display with 'friendly' units.
538 * <p>
539 * This involves converting it to the largest unit
540 * (of B, KB, MB, GB, TB) for which the amount will be > 1.
541 * <p>
542 * Additionally, at least 2 significant digits are always displayed.
543 * <p>
544 * Displays as bytes (B): 0-1023
545 * Displays as kilobytes (KB): 1024 - 2097151 (~2Mb)
546 * Displays as megabytes (MB): 2097152 - 4294967295 (~4Gb)
547 * Displays as gigabytes (GB): 4294967296 - infinity
548 * <p>
549 * Negative numbers will be returned as '0 B'.
550 *
551 * @param amount the amount of bytes
552 * @return A string containing the amount, properly formated.
553 */
554 public static String formatBytesForDisplay(long amount) {
555 double displayAmount = (double) amount;
556 int unitPowerOf1024 = 0;
557
558 if(amount <= 0){
559 return "0 B";
560 }
561
562 while(displayAmount>=1024 && unitPowerOf1024 < 4) {
563 displayAmount = displayAmount / 1024;
564 unitPowerOf1024++;
565 }
566
567
568 final String[] units = { " B", " KB", " MB", " GB", " TB" };
569
570
571 int fractionDigits = (displayAmount < 10) ? 1 : 0;
572 return doubleToString(displayAmount, fractionDigits, fractionDigits)
573 + units[unitPowerOf1024];
574 }
575
576 /***
577 * Convert milliseconds value to a human-readable duration
578 * @param time
579 * @return Human readable string version of passed <code>time</code>
580 */
581 public static String formatMillisecondsToConventional(long time) {
582 return formatMillisecondsToConventional(time,true);
583 }
584
585 /***
586 * Convert milliseconds value to a human-readable duration
587 * @param time
588 * @param toMs whether to print to the ms
589 * @return Human readable string version of passed <code>time</code>
590 */
591 public static String formatMillisecondsToConventional(long time, boolean toMs) {
592 StringBuffer sb = new StringBuffer();
593 if(time<0) {
594 sb.append("-");
595 }
596 long absTime = Math.abs(time);
597 if(!toMs && absTime < 1000) {
598 return "0s";
599 }
600 if(absTime > DAY_IN_MS) {
601
602 sb.append(absTime / DAY_IN_MS + "d");
603 absTime = absTime % DAY_IN_MS;
604 }
605 if (absTime > HOUR_IN_MS) {
606
607 sb.append(absTime / HOUR_IN_MS + "h");
608 absTime = absTime % HOUR_IN_MS;
609 }
610 if (absTime > 60000) {
611 sb.append(absTime / 60000 + "m");
612 absTime = absTime % 60000;
613 }
614 if (absTime > 1000) {
615 sb.append(absTime / 1000 + "s");
616 absTime = absTime % 1000;
617 }
618 if(toMs) {
619 sb.append(absTime + "ms");
620 }
621 return sb.toString();
622 }
623
624
625 /***
626 * Generate a long UID based on the given class and version number.
627 * Using this instead of the default will assume serialization
628 * compatibility across class changes unless version number is
629 * intentionally bumped.
630 *
631 * @param class1
632 * @param version
633 * @return UID based off class and version number.
634 */
635 public static long classnameBasedUID(Class class1, int version) {
636 String callingClassname = class1.getName();
637 return (long)callingClassname.hashCode() << 32 + version;
638 }
639
640 /***
641 * Copy the raw bytes of a long into a byte array, starting at
642 * the specified offset.
643 *
644 * @param l
645 * @param array
646 * @param offset
647 */
648 public static void longIntoByteArray(long l, byte[] array, int offset) {
649 int i, shift;
650
651 for(i = 0, shift = 56; i < 8; i++, shift -= 8)
652 array[offset+i] = (byte)(0xFF & (l >> shift));
653 }
654
655 public static long byteArrayIntoLong(byte [] bytearray) {
656 return byteArrayIntoLong(bytearray, 0);
657 }
658
659 /***
660 * Byte array into long.
661 * @param bytearray Array to convert to a long.
662 * @param offset Offset into array at which we start decoding the long.
663 * @return Long made of the bytes of <code>array</code> beginning at
664 * offset <code>offset</code>.
665 * @see #longIntoByteArray(long, byte[], int)
666 */
667 public static long byteArrayIntoLong(byte [] bytearray,
668 int offset) {
669 long result = 0;
670 for (int i = offset; i < 8
671 result = (result << 8
672 (0xff & (byte)(bytearray[i] & 0xff));
673 }
674 return result;
675 }
676
677 /***
678 * Given a string that may be a plain host or host/path (without
679 * URI scheme), add an implied http:// if necessary.
680 *
681 * @param u string to evaluate
682 * @return string with http:// added if no scheme already present
683 */
684 public static String addImpliedHttpIfNecessary(String u) {
685 if(u.indexOf(':') == -1 || u.indexOf('.') < u.indexOf(':')) {
686
687 u = "http://" + u;
688 }
689 return u;
690 }
691
692 /***
693 * Verify that the array begins with the prefix.
694 *
695 * @param array
696 * @param prefix
697 * @return true if array is identical to prefix for the first prefix.length
698 * positions
699 */
700 public static boolean startsWith(byte[] array, byte[] prefix) {
701 if(prefix.length>array.length) {
702 return false;
703 }
704 for(int i = 0; i < prefix.length; i++) {
705 if(array[i]!=prefix[i]) {
706 return false;
707 }
708 }
709 return true;
710 }
711
712 /***
713 * Utility method to get a String singleLineReport from Reporter
714 * @param rep Reporter to get singleLineReport from
715 * @return String of report
716 */
717 public static String singleLineReport(Reporter rep) {
718 StringWriter sw = new StringWriter();
719 PrintWriter pw = new PrintWriter(sw);
720 try {
721 rep.singleLineReportTo(pw);
722 } catch (IOException e) {
723
724 e.printStackTrace();
725 }
726 pw.flush();
727 return sw.toString();
728 }
729
730 /***
731 * Compose the requested report into a String. DANGEROUS IF REPORT
732 * CAN BE LARGE.
733 *
734 * @param rep Reported
735 * @param name String name of report to compose
736 * @return String of report
737 */
738 public static String writeReportToString(Reporter rep, String name) {
739 StringWriter sw = new StringWriter();
740 PrintWriter pw = new PrintWriter(sw);
741 rep.reportTo(name,pw);
742 pw.flush();
743 return sw.toString();
744 }
745
746 public static Set<String> TLDS;
747
748 static {
749 TLDS = new HashSet<String>();
750
751
752 String[] tldsArray = { "AC", "AD", "AE", "AERO", "AF", "AG", "AI",
753 "AL", "AM", "AN", "AO", "AQ", "AR", "ARPA", "AS", "ASIA", "AT",
754 "AU", "AW", "AX", "AZ", "BA", "BB", "BD", "BE", "BF", "BG",
755 "BH", "BI", "BIZ", "BJ", "BM", "BN", "BO", "BR", "BS", "BT",
756 "BV", "BW", "BY", "BZ", "CA", "CAT", "CC", "CD", "CF", "CG",
757 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "COM", "COOP", "CR",
758 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO",
759 "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", "EU", "FI",
760 "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF",
761 "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR",
762 "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT",
763 "HU", "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ",
764 "IR", "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG",
765 "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
766 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY",
767 "MA", "MC", "MD", "ME", "MG", "MH", "MIL", "MK", "ML", "MM",
768 "MN", "MO", "MOBI", "MP", "MQ", "MR", "MS", "MT", "MU",
769 "MUSEUM", "MV", "MW", "MX", "MY", "MZ", "NA", "NAME", "NC",
770 "NE", "NET", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU",
771 "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", "PH", "PK", "PL",
772 "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", "QA", "RE",
773 "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG",
774 "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST",
775 "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH",
776 "TJ", "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT",
777 "TV", "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA",
778 "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D",
779 "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A",
780 "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA",
781 "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV",
782 "XN--ZCKZAH", "YE", "YT", "YU", "ZA", "ZM", "ZW" };
783 TLDS.addAll(Arrays.asList(tldsArray));
784 }
785 /***
786 * Return whether the given string represents a known
787 * top-level-domain (like "com", "org", etc.) per IANA
788 * as of 2008071601.
789 *
790 * @param dom candidate string
791 * @return boolean true if recognized as TLD
792 */
793 public static boolean isTld(String dom) {
794 return TLDS.contains(dom.toUpperCase());
795 }
796 }
797