1   /* WriterPoolProcessor
2    *
3    * $Id: WriterPoolProcessor.java 6013 2008-10-28 04:27:08Z gojomo $
4    *
5    * Created on July 19th, 2006
6    *
7    * Copyright (C) 2006 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.framework;
26  
27  import java.io.DataInputStream;
28  import java.io.DataOutputStream;
29  import java.io.File;
30  import java.io.FileInputStream;
31  import java.io.FileNotFoundException;
32  import java.io.FileOutputStream;
33  import java.io.IOException;
34  import java.io.ObjectInputStream;
35  import java.io.StringWriter;
36  import java.net.InetAddress;
37  import java.net.UnknownHostException;
38  import java.util.ArrayList;
39  import java.util.Arrays;
40  import java.util.Iterator;
41  import java.util.List;
42  import java.util.concurrent.atomic.AtomicInteger;
43  import java.util.logging.Logger;
44  
45  import javax.management.AttributeNotFoundException;
46  import javax.management.MBeanException;
47  import javax.management.ReflectionException;
48  import javax.xml.transform.SourceLocator;
49  import javax.xml.transform.Templates;
50  import javax.xml.transform.Transformer;
51  import javax.xml.transform.TransformerConfigurationException;
52  import javax.xml.transform.TransformerException;
53  import javax.xml.transform.TransformerFactory;
54  import javax.xml.transform.stream.StreamResult;
55  import javax.xml.transform.stream.StreamSource;
56  
57  import org.archive.crawler.Heritrix;
58  import org.archive.crawler.datamodel.CoreAttributeConstants;
59  import org.archive.crawler.datamodel.CrawlHost;
60  import org.archive.crawler.datamodel.CrawlOrder;
61  import org.archive.crawler.datamodel.CrawlURI;
62  import org.archive.crawler.datamodel.FetchStatusCodes;
63  import org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule;
64  import org.archive.crawler.event.CrawlStatusListener;
65  import org.archive.crawler.settings.SimpleType;
66  import org.archive.crawler.settings.StringList;
67  import org.archive.crawler.settings.Type;
68  import org.archive.crawler.settings.XMLSettingsHandler;
69  import org.archive.io.ObjectPlusFilesInputStream;
70  import org.archive.io.WriterPool;
71  import org.archive.io.WriterPoolMember;
72  
73  /***
74   * Abstract implementation of a file pool processor.
75   * Subclass to implement for a particular {@link WriterPoolMember} instance.
76   * @author Parker Thompson
77   * @author stack
78   */
79  public abstract class WriterPoolProcessor extends Processor
80  implements CoreAttributeConstants, CrawlStatusListener, FetchStatusCodes {
81      private final Logger logger = Logger.getLogger(this.getClass().getName());
82  
83      /***
84       * Key to use asking settings for file compression value.
85       */
86      public static final String ATTR_COMPRESS = "compress";
87  
88      /***
89       * Default as to whether we do compression of files.
90       */
91      public static final boolean DEFAULT_COMPRESS = true;
92  
93      /***
94       * Key to use asking settings for file prefix value.
95       */
96      public static final String ATTR_PREFIX = "prefix";    
97  
98      /***
99       * Key to use asking settings for arc path value.
100      */
101     public static final String ATTR_PATH ="path";
102 
103     /***
104      * Key to use asking settings for file suffix value.
105      */
106     public static final String ATTR_SUFFIX = "suffix";
107 
108     /***
109      * Key to use asking settings for file max size value.
110      */
111     public static final String ATTR_MAX_SIZE_BYTES = "max-size-bytes";
112     
113     /***
114      * Key to get maximum pool size.
115      *
116      * This key is for maximum files active in the pool.
117      */
118     public static final String ATTR_POOL_MAX_ACTIVE = "pool-max-active";
119 
120     /***
121      * Key to get maximum wait on pool object before we give up and
122      * throw IOException.
123      */
124     public static final String ATTR_POOL_MAX_WAIT = "pool-max-wait";
125 
126     /***
127      * Key for the maximum bytes to write attribute.
128      */
129     public static final String ATTR_MAX_BYTES_WRITTEN =
130     	"total-bytes-to-write";
131     
132     /***
133      * Key for whether to skip writing records of content-digest repeats 
134      */
135     public static final String ATTR_SKIP_IDENTICAL_DIGESTS =
136         "skip-identical-digests";
137     
138     /***
139      * CrawlURI annotation indicating no record was written
140      */
141     protected static final String ANNOTATION_UNWRITTEN = "unwritten";
142     
143     /***
144      * Default maximum file size.
145      * TODO: Check that subclasses can set a different MAX_FILE_SIZE and
146      * it will be used in the constructor as default.
147      */
148     private static final int DEFAULT_MAX_FILE_SIZE = 100000000;
149     
150     /***
151      * Default path list.
152      * 
153      * TODO: Confirm this one gets picked up.
154      */
155     private static final String [] DEFAULT_PATH = {"crawl-store"};
156 
157     /***
158      * Reference to pool.
159      */
160     transient private WriterPool pool = null;
161     
162     /***
163      * Total number of bytes written to disc.
164      */
165     private long totalBytesWritten = 0;
166     
167     /***
168      * Calculate metadata once only.
169      */
170     transient private List<String> cachedMetadata = null;
171 
172 
173     /***
174      * @param name Name of this processor.
175      */
176     public WriterPoolProcessor(String name) {
177     	this(name, "Pool of files processor");
178     }
179     	
180     /***
181      * @param name Name of this processor.
182      * @param description Description for this processor.
183      */
184     public WriterPoolProcessor(final String name,
185         		final String description) {
186         super(name, description);
187         Type e = addElementToDefinition(
188             new SimpleType(ATTR_COMPRESS, "Compress files when " +
189             	"writing to disk.", new Boolean(DEFAULT_COMPRESS)));
190         e.setOverrideable(false);
191         e = addElementToDefinition(
192             new SimpleType(ATTR_PREFIX, 
193                 "File prefix. " +
194                 "The text supplied here will be used as a prefix naming " +
195                 "writer files.  For example if the prefix is 'IAH', " +
196                 "then file names will look like " +
197                 "IAH-20040808101010-0001-HOSTNAME.arc.gz " +
198                 "...if writing ARCs (The prefix will be " +
199                 "separated from the date by a hyphen).",
200                 WriterPoolMember.DEFAULT_PREFIX));
201         e = addElementToDefinition(
202             new SimpleType(ATTR_SUFFIX, "Suffix to tag onto " +
203                 "files. '${HOSTNAME}' in the suffix will be " +
204                 "replaced with the local hostname. If empty, " +
205                 "no suffix will be added.",
206                 WriterPoolMember.DEFAULT_SUFFIX));
207         e.setOverrideable(false);
208         e = addElementToDefinition(
209             new SimpleType(ATTR_MAX_SIZE_BYTES, "Max size of each file",
210                 new Long(DEFAULT_MAX_FILE_SIZE)));
211         e.setOverrideable(false);
212         e = addElementToDefinition(
213             new StringList(ATTR_PATH, "Where to files. " +
214                 "Supply absolute or relative path.  If relative, files " +
215                 "will be written relative to " +
216                 "the " + CrawlOrder.ATTR_DISK_PATH + "setting." +
217                 " If more than one path specified, we'll round-robin" +
218                 " dropping files to each.  This setting is safe" +
219                 " to change midcrawl (You can remove and add new dirs" +
220                 " as the crawler progresses).", getDefaultPath()));
221         e.setOverrideable(false);
222         e = addElementToDefinition(new SimpleType(ATTR_POOL_MAX_ACTIVE,
223             "Maximum active files in pool. " +
224             "This setting cannot be varied over the life of a crawl.",
225             new Integer(WriterPool.DEFAULT_MAX_ACTIVE)));
226         e.setOverrideable(false);
227         e = addElementToDefinition(new SimpleType(ATTR_POOL_MAX_WAIT,
228             "Maximum time to wait on pool element" +
229             " (milliseconds). This setting cannot be varied over the life" +
230             " of a crawl.",
231             new Integer(WriterPool.DEFAULT_MAXIMUM_WAIT)));
232         e.setOverrideable(false);
233         e = addElementToDefinition(new SimpleType(ATTR_MAX_BYTES_WRITTEN,
234             "Total file bytes to write to disk." +
235             " Once the size of all files on disk has exceeded this " +
236             "limit, this processor will stop the crawler. " +
237             "A value of zero means no upper limit.", new Long(0)));
238         e.setOverrideable(false);
239         e.setExpertSetting(true);
240         e = addElementToDefinition(new SimpleType(ATTR_SKIP_IDENTICAL_DIGESTS,
241                 "Whether to skip the writing of a record when URI " +
242                 "history information is available and indicates the " +
243                 "prior fetch had an identical content digest. " +
244                 "Default is false.", new Boolean(false)));
245         e.setOverrideable(true);
246         e.setExpertSetting(true);
247     }
248     
249     protected String [] getDefaultPath() {
250     	return DEFAULT_PATH;
251 	}
252 
253     public synchronized void initialTasks() {
254         // Add this class to crawl state listeners and setup pool.
255         getSettingsHandler().getOrder().getController().
256             addCrawlStatusListener(this);
257         setupPool(new AtomicInteger());
258         // Run checkpoint recovery code.
259         if (getSettingsHandler().getOrder().getController().
260         		isCheckpointRecover()) {
261         	checkpointRecover();
262         }
263     }
264     
265     protected AtomicInteger getSerialNo() {
266         return ((WriterPool)getPool()).getSerialNo();
267     }
268 
269     /***
270      * Set up pool of files.
271      */
272     protected abstract void setupPool(final AtomicInteger serialNo);
273 
274     /***
275      * Writes a CrawlURI and its associated data to store file.
276      *
277      * Currently this method understands the following uri types: dns, http, 
278      * and https.
279      *
280      * @param curi CrawlURI to process.
281      */
282     protected abstract void innerProcess(CrawlURI curi);
283     
284     protected void checkBytesWritten() {
285         long max = getMaxToWrite();
286         if (max <= 0) {
287             return;
288         }
289         if (max <= this.totalBytesWritten) {
290             getController().requestCrawlStop("Finished - Maximum bytes (" +
291                 Long.toString(max) + ") written");
292         }
293     }
294     
295     /***
296      * Whether the given CrawlURI should be written to archive files. 
297      * Annotates CrawlURI with a reason for any negative answer. 
298      * 
299      * @param curi CrawlURI
300      * @return true if URI should be written; false otherwise
301      */
302     protected boolean shouldWrite(CrawlURI curi) {
303         // check for duplicate content write suppression
304         if(((Boolean)getUncheckedAttribute(curi, ATTR_SKIP_IDENTICAL_DIGESTS)) 
305             && IdenticalDigestDecideRule.hasIdenticalDigest(curi)) {
306             curi.addAnnotation(ANNOTATION_UNWRITTEN + ":identicalDigest");
307             return false; 
308         }
309         String scheme = curi.getUURI().getScheme().toLowerCase();
310         // TODO: possibly move this sort of isSuccess() test into CrawlURI
311         boolean retVal; 
312         if (scheme.equals("dns")) {
313             retVal = curi.getFetchStatus() == S_DNS_SUCCESS;
314         } else if (scheme.equals("http") || scheme.equals("https")) {
315             retVal = curi.getFetchStatus() > 0 && curi.isHttpTransaction();
316         } else if (scheme.equals("ftp")) {
317             retVal = curi.getFetchStatus() == 200;
318         } else {
319             // unsupported scheme
320             curi.addAnnotation(ANNOTATION_UNWRITTEN + ":scheme");
321             return false; 
322         }
323         if (retVal == false) {
324             // status not deserving writing
325             curi.addAnnotation(ANNOTATION_UNWRITTEN + ":status");
326             return false; 
327         }
328         return true; 
329     }
330     
331     /***
332      * Return IP address of given URI suitable for recording (as in a
333      * classic ARC 5-field header line).
334      * 
335      * @param curi CrawlURI
336      * @return String of IP address
337      */
338     protected String getHostAddress(CrawlURI curi) {
339         // special handling for DNS URIs: want address of DNS server
340         if(curi.getUURI().getScheme().toLowerCase().equals("dns")) {
341             return curi.getString(A_DNS_SERVER_IP_LABEL);
342         }
343         // otherwise, host referenced in URI
344         CrawlHost h = getController().getServerCache().getHostFor(curi);
345         if (h == null) {
346             throw new NullPointerException("Crawlhost is null for " +
347                 curi + " " + curi.getVia());
348         }
349         InetAddress a = h.getIP();
350         if (a == null) {
351             throw new NullPointerException("Address is null for " +
352                 curi + " " + curi.getVia() + ". Address " +
353                 ((h.getIpFetched() == CrawlHost.IP_NEVER_LOOKED_UP)?
354                      "was never looked up.":
355                      (System.currentTimeMillis() - h.getIpFetched()) +
356                          " ms ago."));
357         }
358         return h.getIP().getHostAddress();
359     }
360     
361     /***
362      * Version of getAttributes that catches and logs exceptions
363      * and returns null if failure to fetch the attribute.
364      * @param name Attribute name.
365      * @return Attribute or null.
366      */
367     public Object getAttributeUnchecked(String name) {
368         Object result = null;
369         try {
370             result = super.getAttribute(name);
371         } catch (AttributeNotFoundException e) {
372             logger.warning(e.getLocalizedMessage());
373         } catch (MBeanException e) {
374             logger.warning(e.getLocalizedMessage());
375         } catch (ReflectionException e) {
376             logger.warning(e.getLocalizedMessage());
377         }
378         return result;
379     }
380 
381    /***
382     * Max size we want files to be (bytes).
383     *
384     * Default is ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE.  Note that ARC
385     * files will usually be bigger than maxSize; they'll be maxSize + length
386     * to next boundary.
387     * @return ARC maximum size.
388     */
389     public long getMaxSize() {
390         Object obj = getAttributeUnchecked(ATTR_MAX_SIZE_BYTES);
391         return (obj == null)? DEFAULT_MAX_FILE_SIZE: ((Long)obj).longValue();
392     }
393 
394     public String getPrefix() {
395         Object obj = getAttributeUnchecked(ATTR_PREFIX);
396         return (obj == null)? WriterPoolMember.DEFAULT_PREFIX: (String)obj;
397     }
398 
399     public List<File> getOutputDirs() {
400         Object obj = getAttributeUnchecked(ATTR_PATH);
401         List list = (obj == null)? Arrays.asList(DEFAULT_PATH): (StringList)obj;
402         ArrayList<File> results = new ArrayList<File>();
403         for (Iterator i = list.iterator(); i.hasNext();) {
404             String path = (String)i.next();
405             File f = new File(path);
406             if (!f.isAbsolute()) {
407                 f = new File(getController().getDisk(), path);
408             }
409             if (!f.exists()) {
410                 try {
411                     f.mkdirs();
412                 } catch (Exception e) {
413                     e.printStackTrace();
414                     continue;
415                 }
416             }
417             results.add(f);
418         }
419         return results;
420     }
421     
422     public boolean isCompressed() {
423         Object obj = getAttributeUnchecked(ATTR_COMPRESS);
424         return (obj == null)? DEFAULT_COMPRESS:
425             ((Boolean)obj).booleanValue();
426     }
427 
428     /***
429      * @return Returns the poolMaximumActive.
430      */
431     public int getPoolMaximumActive() {
432         Object obj = getAttributeUnchecked(ATTR_POOL_MAX_ACTIVE);
433         return (obj == null)? WriterPool.DEFAULT_MAX_ACTIVE:
434             ((Integer)obj).intValue();
435     }
436 
437     /***
438      * @return Returns the poolMaximumWait.
439      */
440     public int getPoolMaximumWait() {
441         Object obj = getAttributeUnchecked(ATTR_POOL_MAX_WAIT);
442         return (obj == null)? WriterPool.DEFAULT_MAXIMUM_WAIT:
443             ((Integer)obj).intValue();
444     }
445 
446     public String getSuffix() {
447         Object obj = getAttributeUnchecked(ATTR_SUFFIX);
448         String sfx = (obj == null)?
449             WriterPoolMember.DEFAULT_SUFFIX: (String)obj;
450         sfx = sfx.trim(); 
451         if (sfx.contains(WriterPoolMember.HOSTNAME_VARIABLE)) {
452             String str = "localhost.localdomain";
453             try {
454                 str = InetAddress.getLocalHost().getHostName();
455             } catch (UnknownHostException ue) {
456                 logger.severe("Failed getHostAddress for this host: " + ue);
457             }
458             sfx = sfx.replace(WriterPoolMember.HOSTNAME_VARIABLE, str);
459         }
460         return sfx;
461     }
462     
463     public long getMaxToWrite() {
464         Object obj = getAttributeUnchecked(ATTR_MAX_BYTES_WRITTEN);
465         return (obj == null)? 0: ((Long)obj).longValue();
466     }
467 
468 	public void crawlEnding(String sExitMessage) {
469 		this.pool.close();
470 	}
471 
472 	public void crawlEnded(String sExitMessage) {
473         // sExitMessage is unused.
474 	}
475 
476     /* (non-Javadoc)
477      * @see org.archive.crawler.event.CrawlStatusListener#crawlStarted(java.lang.String)
478      */
479     public void crawlStarted(String message) {
480         // TODO Auto-generated method stub
481     }
482     
483     protected String getCheckpointStateFile() {
484     	return this.getClass().getName() + ".state";
485     }
486     
487     public void crawlCheckpoint(File checkpointDir) throws IOException {
488         int serial = getSerialNo().get();
489         if (this.pool.getNumActive() > 0) {
490             // If we have open active Archive files, up the serial number
491             // so after checkpoint, we start at one past current number and
492             // so the number we serialize, is one past current serialNo.
493             // All this serial number manipulation should be fine in here since
494             // we're paused checkpointing (Revisit if this assumption changes).
495             serial = getSerialNo().incrementAndGet();
496         }
497         saveCheckpointSerialNumber(checkpointDir, serial);
498         // Close all ARCs on checkpoint.
499         try {
500             this.pool.close();
501         } finally {
502             // Reopen on checkpoint.
503             setupPool(new AtomicInteger(serial));
504         }
505     }
506     
507 	public void crawlPausing(String statusMessage) {
508         // sExitMessage is unused.
509 	}
510 
511 	public void crawlPaused(String statusMessage) {
512         // sExitMessage is unused.
513 	}
514 
515 	public void crawlResuming(String statusMessage) {
516         // sExitMessage is unused.
517 	}
518 	
519     private void readObject(ObjectInputStream stream)
520     throws IOException, ClassNotFoundException {
521         stream.defaultReadObject();
522         ObjectPlusFilesInputStream coistream =
523             (ObjectPlusFilesInputStream)stream;
524         coistream.registerFinishTask( new Runnable() {
525             public void run() {
526             	setupPool(new AtomicInteger());
527             }
528         });
529     }
530 
531 	protected WriterPool getPool() {
532 		return pool;
533 	}
534 
535 	protected void setPool(WriterPool pool) {
536 		this.pool = pool;
537 	}
538 
539 	protected long getTotalBytesWritten() {
540 		return totalBytesWritten;
541 	}
542 
543 	protected void setTotalBytesWritten(long totalBytesWritten) {
544         this.totalBytesWritten = totalBytesWritten;
545     }
546 	
547     /***
548      * Called out of {@link #initialTasks()} when recovering a checkpoint.
549      * Restore state.
550      */
551     protected void checkpointRecover() {
552         int serialNo = loadCheckpointSerialNumber();
553         if (serialNo != -1) {
554             getSerialNo().set(serialNo);
555         }
556     }
557 
558     /***
559      * @return Serial number from checkpoint state file or if unreadable, -1
560      * (Client should check for -1).
561      */
562     protected int loadCheckpointSerialNumber() {
563         int result = -1;
564         
565         // If in recover mode, read in the Writer serial number saved
566         // off when we checkpointed.
567         File stateFile = new File(getSettingsHandler().getOrder()
568                 .getController().getCheckpointRecover().getDirectory(),
569                 getCheckpointStateFile());
570         if (!stateFile.exists()) {
571             logger.info(stateFile.getAbsolutePath()
572                     + " doesn't exist so cannot restore Writer serial number.");
573         } else {
574             DataInputStream dis = null;
575             try {
576                 dis = new DataInputStream(new FileInputStream(stateFile));
577                 result = dis.readShort();
578             } catch (FileNotFoundException e) {
579                 e.printStackTrace();
580             } catch (IOException e) {
581                 e.printStackTrace();
582             } finally {
583                 try {
584                     if (dis != null) {
585                         dis.close();
586                     }
587                 } catch (IOException e) {
588                     e.printStackTrace();
589                 }
590             }
591         }
592         return result;
593     }
594     
595     protected void saveCheckpointSerialNumber(final File checkpointDir,
596             final int serialNo)
597     throws IOException {
598         // Write out the current state of the ARCWriter serial number.
599         File f = new File(checkpointDir, getCheckpointStateFile());
600         DataOutputStream dos = new DataOutputStream(new FileOutputStream(f));
601         try {
602             dos.writeShort(serialNo);
603         } finally {
604             dos.close();
605         }
606     }
607     
608     /***
609      * Return list of metadatas to add to first arc file metadata record.
610      * 
611      * Default is to stylesheet the order file.  To specify stylesheet,
612      * override {@link #getFirstrecordStylesheet()}.
613      *
614      * Get xml files from settingshandler.  Currently order file is the
615      * only xml file.  We're NOT adding seeds to meta data.
616      *
617      * @return List of strings and/or files to add to arc file as metadata or
618      * null.
619      */
620     public synchronized List<String> getMetadata() {
621         if (this.cachedMetadata != null) {
622             return this.cachedMetadata;
623         }
624         return cacheMetadata();
625     }
626     
627     protected synchronized List<String> cacheMetadata() {
628         
629         // If no stylesheet, return empty metadata.
630         if (getFirstrecordStylesheet() == null ||
631                 getFirstrecordStylesheet().length() == 0) {
632             this.cachedMetadata = new ArrayList<String>(1);
633             this.cachedMetadata.add("");
634             return this.cachedMetadata;
635         }
636         
637         List<String> result = null;
638         if (!XMLSettingsHandler.class.isInstance(getSettingsHandler())) {
639             logger.warning("Expected xml settings handler (No warcinfo).");
640             // Early return
641             return result;
642         }
643         
644         XMLSettingsHandler xsh = (XMLSettingsHandler)getSettingsHandler();
645         File orderFile = xsh.getOrderFile();
646         if (!orderFile.exists() || !orderFile.canRead()) {
647                 logger.severe("File " + orderFile.getAbsolutePath() +
648                     " is does not exist or is not readable.");
649         } else {
650             result = new ArrayList<String>(1);
651             result.add(getFirstrecordBody(orderFile));
652         }
653         this.cachedMetadata = result;
654         return this.cachedMetadata;
655     }
656     
657     /***
658      * @preturn Full path to stylesheet (Its read off the CLASSPATH
659      * as resource).
660      */
661     protected String getFirstrecordStylesheet() {
662         return null;
663     }
664 
665     /***
666      * Write the arc metadata body content.
667      *
668      * Its based on the order xml file but into this base we'll add other info
669      * such as machine ip.
670      *
671      * @param orderFile Order file.
672 
673      *
674      * @return String that holds the arc metaheader body.
675      */
676     protected String getFirstrecordBody(File orderFile) {
677         String result = null;
678         TransformerFactory factory = TransformerFactory.newInstance();
679         Templates templates = null;
680         Transformer xformer = null;
681         try {
682             templates = factory.newTemplates(new StreamSource(
683                 this.getClass().getResourceAsStream(getFirstrecordStylesheet())));
684             xformer = templates.newTransformer();
685             // Below parameter names must match what is in the stylesheet.
686             xformer.setParameter("software", "Heritrix " +
687                 Heritrix.getVersion() + " http://crawler.archive.org");
688             xformer.setParameter("ip",
689                 InetAddress.getLocalHost().getHostAddress());
690             xformer.setParameter("hostname",
691                 InetAddress.getLocalHost().getHostName());
692             StreamSource source = new StreamSource(
693                 new FileInputStream(orderFile));
694             StringWriter writer = new StringWriter();
695             StreamResult target = new StreamResult(writer);
696             xformer.transform(source, target);
697             result= writer.toString();
698         } catch (TransformerConfigurationException e) {
699             logger.severe("Failed transform " + e);
700         } catch (FileNotFoundException e) {
701             logger.severe("Failed transform, file not found " + e);
702         } catch (UnknownHostException e) {
703             logger.severe("Failed transform, unknown host " + e);
704         } catch(TransformerException e) {
705             SourceLocator locator = e.getLocator();
706             int col = locator.getColumnNumber();
707             int line = locator.getLineNumber();
708             String publicId = locator.getPublicId();
709             String systemId = locator.getSystemId();
710             logger.severe("Transform error " + e + ", col " + col + ", line " +
711                 line + ", publicId " + publicId + ", systemId " + systemId);
712         }
713 
714         return result;
715     }
716 }