001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.dump;
020
021import org.apache.commons.compress.archivers.ArchiveException;
022import org.apache.commons.compress.archivers.ArchiveInputStream;
023import org.apache.commons.compress.archivers.zip.ZipEncoding;
024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
025
026import java.io.EOFException;
027import java.io.IOException;
028import java.io.InputStream;
029
030import java.util.Arrays;
031import java.util.Comparator;
032import java.util.HashMap;
033import java.util.Map;
034import java.util.PriorityQueue;
035import java.util.Queue;
036import java.util.Stack;
037
038/**
039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.
040 * Methods are provided to position at each successive entry in
041 * the archive, and the read each entry as a normal input stream
042 * using read().
043 *
044 * There doesn't seem to exist a hint on the encoding of string values
045 * in any piece documentation.  Given the main purpose of dump/restore
046 * is backing up a system it seems very likely the format uses the
047 * current default encoding of the system.
048 *
049 * @NotThreadSafe
050 */
051public class DumpArchiveInputStream extends ArchiveInputStream {
052    private DumpArchiveSummary summary;
053    private DumpArchiveEntry active;
054    private boolean isClosed;
055    private boolean hasHitEOF;
056    private long entrySize;
057    private long entryOffset;
058    private int readIdx;
059    private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
060    private byte[] blockBuffer;
061    private int recordOffset;
062    private long filepos;
063    protected TapeInputStream raw;
064
065    // map of ino -> dirent entry. We can use this to reconstruct full paths.
066    private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();
067
068    // map of ino -> (directory) entry when we're missing one or more elements in the path.
069    private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();
070
071    // queue of (directory) entries where we now have the full path.
072    private Queue<DumpArchiveEntry> queue;
073
074    /**
075     * The encoding to use for filenames and labels.
076     */
077    private final ZipEncoding zipEncoding;
078
079    // the provided encoding (for unit tests)
080    final String encoding;
081
082    /**
083     * Constructor using the platform's default encoding for file
084     * names.
085     *
086     * @param is
087     * @throws ArchiveException
088     */
089    public DumpArchiveInputStream(InputStream is) throws ArchiveException {
090        this(is, null);
091    }
092
093    /**
094     * Constructor.
095     *
096     * @param is
097     * @param encoding the encoding to use for file names, use null
098     * for the platform's default encoding
099     * @since 1.6
100     */
101    public DumpArchiveInputStream(InputStream is, String encoding)
102        throws ArchiveException {
103        this.raw = new TapeInputStream(is);
104        this.hasHitEOF = false;
105        this.encoding = encoding;
106        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
107
108        try {
109            // read header, verify it's a dump archive.
110            byte[] headerBytes = raw.readRecord();
111
112            if (!DumpArchiveUtil.verify(headerBytes)) {
113                throw new UnrecognizedFormatException();
114            }
115
116            // get summary information
117            summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);
118
119            // reset buffer with actual block size.
120            raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
121
122            // allocate our read buffer.
123            blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
124
125            // skip past CLRI and BITS segments since we don't handle them yet.
126            readCLRI();
127            readBITS();
128        } catch (IOException ex) {
129            throw new ArchiveException(ex.getMessage(), ex);
130        }
131
132        // put in a dummy record for the root node.
133        Dirent root = new Dirent(2, 2, 4, ".");
134        names.put(2, root);
135
136        // use priority based on queue to ensure parent directories are
137        // released first.
138        queue = new PriorityQueue<DumpArchiveEntry>(10,
139                new Comparator<DumpArchiveEntry>() {
140                    public int compare(DumpArchiveEntry p, DumpArchiveEntry q) {
141                        if (p.getOriginalName() == null || q.getOriginalName() == null) {
142                            return Integer.MAX_VALUE;
143                        }
144
145                        return p.getOriginalName().compareTo(q.getOriginalName());
146                    }
147                });
148    }
149
150    @Deprecated
151    @Override
152    public int getCount() {
153        return (int) getBytesRead();
154    }
155
156    @Override
157    public long getBytesRead() {
158        return raw.getBytesRead();
159    }
160
161    /**
162     * Return the archive summary information.
163     */
164    public DumpArchiveSummary getSummary() {
165        return summary;
166    }
167
168    /**
169     * Read CLRI (deleted inode) segment.
170     */
171    private void readCLRI() throws IOException {
172        byte[] buffer = raw.readRecord();
173
174        if (!DumpArchiveUtil.verify(buffer)) {
175            throw new InvalidFormatException();
176        }
177
178        active = DumpArchiveEntry.parse(buffer);
179
180        if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
181            throw new InvalidFormatException();
182        }
183
184        // we don't do anything with this yet.
185        if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
186            == -1) {
187            throw new EOFException();
188        }
189        readIdx = active.getHeaderCount();
190    }
191
192    /**
193     * Read BITS segment.
194     */
195    private void readBITS() throws IOException {
196        byte[] buffer = raw.readRecord();
197
198        if (!DumpArchiveUtil.verify(buffer)) {
199            throw new InvalidFormatException();
200        }
201
202        active = DumpArchiveEntry.parse(buffer);
203
204        if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
205            throw new InvalidFormatException();
206        }
207
208        // we don't do anything with this yet.
209        if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
210            == -1) {
211            throw new EOFException();
212        }
213        readIdx = active.getHeaderCount();
214    }
215
216    /**
217     * Read the next entry.
218     */
219    public DumpArchiveEntry getNextDumpEntry() throws IOException {
220        return getNextEntry();
221    }
222
223    /**
224     * Read the next entry.
225     */
226    @Override
227    public DumpArchiveEntry getNextEntry() throws IOException {
228        DumpArchiveEntry entry = null;
229        String path = null;
230
231        // is there anything in the queue?
232        if (!queue.isEmpty()) {
233            return queue.remove();
234        }
235
236        while (entry == null) {
237            if (hasHitEOF) {
238                return null;
239            }
240
241            // skip any remaining records in this segment for prior file.
242            // we might still have holes... easiest to do it
243            // block by block. We may want to revisit this if
244            // the unnecessary decompression time adds up.
245            while (readIdx < active.getHeaderCount()) {
246                if (!active.isSparseRecord(readIdx++)
247                    && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
248                    throw new EOFException();
249                }
250            }
251
252            readIdx = 0;
253            filepos = raw.getBytesRead();
254
255            byte[] headerBytes = raw.readRecord();
256
257            if (!DumpArchiveUtil.verify(headerBytes)) {
258                throw new InvalidFormatException();
259            }
260
261            active = DumpArchiveEntry.parse(headerBytes);
262
263            // skip any remaining segments for prior file.
264            while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
265                if (raw.skip(DumpArchiveConstants.TP_SIZE
266                             * (active.getHeaderCount()
267                                - active.getHeaderHoles())) == -1) {
268                    throw new EOFException();
269                }
270
271                filepos = raw.getBytesRead();
272                headerBytes = raw.readRecord();
273
274                if (!DumpArchiveUtil.verify(headerBytes)) {
275                    throw new InvalidFormatException();
276                }
277
278                active = DumpArchiveEntry.parse(headerBytes);
279            }
280
281            // check if this is an end-of-volume marker.
282            if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
283                hasHitEOF = true;
284
285                return null;
286            }
287
288            entry = active;
289
290            if (entry.isDirectory()) {
291                readDirectoryEntry(active);
292
293                // now we create an empty InputStream.
294                entryOffset = 0;
295                entrySize = 0;
296                readIdx = active.getHeaderCount();
297            } else {
298                entryOffset = 0;
299                entrySize = active.getEntrySize();
300                readIdx = 0;
301            }
302
303            recordOffset = readBuf.length;
304
305            path = getPath(entry);
306
307            if (path == null) {
308                entry = null;
309            }
310        }
311
312        entry.setName(path);
313        entry.setSimpleName(names.get(entry.getIno()).getName());
314        entry.setOffset(filepos);
315
316        return entry;
317    }
318
319    /**
320     * Read directory entry.
321     */
322    private void readDirectoryEntry(DumpArchiveEntry entry)
323        throws IOException {
324        long size = entry.getEntrySize();
325        boolean first = true;
326
327        while (first ||
328                DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {
329            // read the header that we just peeked at.
330            if (!first) {
331                raw.readRecord();
332            }
333
334            if (!names.containsKey(entry.getIno()) &&
335                    DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {
336                pending.put(entry.getIno(), entry);
337            }
338
339            int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
340
341            if (blockBuffer.length < datalen) {
342                blockBuffer = new byte[datalen];
343            }
344
345            if (raw.read(blockBuffer, 0, datalen) != datalen) {
346                throw new EOFException();
347            }
348
349            int reclen = 0;
350
351            for (int i = 0; i < datalen - 8 && i < size - 8;
352                    i += reclen) {
353                int ino = DumpArchiveUtil.convert32(blockBuffer, i);
354                reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
355
356                byte type = blockBuffer[i + 6];
357
358                String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);
359
360                if (".".equals(name) || "..".equals(name)) {
361                    // do nothing...
362                    continue;
363                }
364
365                Dirent d = new Dirent(ino, entry.getIno(), type, name);
366
367                /*
368                if ((type == 4) && names.containsKey(ino)) {
369                    System.out.println("we already have ino: " +
370                                       names.get(ino));
371                }
372                */
373
374                names.put(ino, d);
375
376                // check whether this allows us to fill anything in the pending list.
377                for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {
378                    String path = getPath(e.getValue());
379
380                    if (path != null) {
381                        e.getValue().setName(path);
382                        e.getValue()
383                         .setSimpleName(names.get(e.getKey()).getName());
384                        queue.add(e.getValue());
385                    }
386                }
387
388                // remove anything that we found. (We can't do it earlier
389                // because of concurrent modification exceptions.)
390                for (DumpArchiveEntry e : queue) {
391                    pending.remove(e.getIno());
392                }
393            }
394
395            byte[] peekBytes = raw.peek();
396
397            if (!DumpArchiveUtil.verify(peekBytes)) {
398                throw new InvalidFormatException();
399            }
400
401            entry = DumpArchiveEntry.parse(peekBytes);
402            first = false;
403            size -= DumpArchiveConstants.TP_SIZE;
404        }
405    }
406
407    /**
408     * Get full path for specified archive entry, or null if there's a gap.
409     *
410     * @param entry
411     * @return  full path for specified archive entry, or null if there's a gap.
412     */
413    private String getPath(DumpArchiveEntry entry) {
414        // build the stack of elements. It's possible that we're 
415        // still missing an intermediate value and if so we
416        Stack<String> elements = new Stack<String>();
417        Dirent dirent = null;
418
419        for (int i = entry.getIno();; i = dirent.getParentIno()) {
420            if (!names.containsKey(i)) {
421                elements.clear();
422                break;
423            }
424
425            dirent = names.get(i);
426            elements.push(dirent.getName());
427
428            if (dirent.getIno() == dirent.getParentIno()) {
429                break;
430            }
431        }
432
433        // if an element is missing defer the work and read next entry.
434        if (elements.isEmpty()) {
435            pending.put(entry.getIno(), entry);
436
437            return null;
438        }
439
440        // generate full path from stack of elements.
441        StringBuilder sb = new StringBuilder(elements.pop());
442
443        while (!elements.isEmpty()) {
444            sb.append('/');
445            sb.append(elements.pop());
446        }
447
448        return sb.toString();
449    }
450
451    /**
452     * Reads bytes from the current dump archive entry.
453     *
454     * This method is aware of the boundaries of the current
455     * entry in the archive and will deal with them as if they
456     * were this stream's start and EOF.
457     *
458     * @param buf The buffer into which to place bytes read.
459     * @param off The offset at which to place bytes read.
460     * @param len The number of bytes to read.
461     * @return The number of bytes read, or -1 at EOF.
462     * @throws IOException on error
463     */
464    @Override
465    public int read(byte[] buf, int off, int len) throws IOException {
466        int totalRead = 0;
467
468        if (hasHitEOF || isClosed || entryOffset >= entrySize) {
469            return -1;
470        }
471
472        if (active == null) {
473            throw new IllegalStateException("No current dump entry");
474        }
475
476        if (len + entryOffset > entrySize) {
477            len = (int) (entrySize - entryOffset);
478        }
479
480        while (len > 0) {
481            int sz = len > readBuf.length - recordOffset
482                ? readBuf.length - recordOffset : len;
483
484            // copy any data we have
485            if (recordOffset + sz <= readBuf.length) {
486                System.arraycopy(readBuf, recordOffset, buf, off, sz);
487                totalRead += sz;
488                recordOffset += sz;
489                len -= sz;
490                off += sz;
491            }
492
493            // load next block if necessary.
494            if (len > 0) {
495                if (readIdx >= 512) {
496                    byte[] headerBytes = raw.readRecord();
497
498                    if (!DumpArchiveUtil.verify(headerBytes)) {
499                        throw new InvalidFormatException();
500                    }
501
502                    active = DumpArchiveEntry.parse(headerBytes);
503                    readIdx = 0;
504                }
505
506                if (!active.isSparseRecord(readIdx++)) {
507                    int r = raw.read(readBuf, 0, readBuf.length);
508                    if (r != readBuf.length) {
509                        throw new EOFException();
510                    }
511                } else {
512                    Arrays.fill(readBuf, (byte) 0);
513                }
514
515                recordOffset = 0;
516            }
517        }
518
519        entryOffset += totalRead;
520
521        return totalRead;
522    }
523
524    /**
525     * Closes the stream for this entry.
526     */
527    @Override
528    public void close() throws IOException {
529        if (!isClosed) {
530            isClosed = true;
531            raw.close();
532        }
533    }
534
535    /**
536     * Look at the first few bytes of the file to decide if it's a dump
537     * archive. With 32 bytes we can look at the magic value, with a full
538     * 1k we can verify the checksum.
539     */
540    public static boolean matches(byte[] buffer, int length) {
541        // do we have enough of the header?
542        if (length < 32) {
543            return false;
544        }
545
546        // this is the best test
547        if (length >= DumpArchiveConstants.TP_SIZE) {
548            return DumpArchiveUtil.verify(buffer);
549        }
550
551        // this will work in a pinch.
552        return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,
553            24);
554    }
555
556}