001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.utils.IOUtils;
037
038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
040import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
042
043/**
044 * Implements an input stream that can read Zip archives.
045 *
046 * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the
047 * DEFLATE algorithm is used, as the size information is not available
048 * from the header.</p>
049 *
050 * <p>The {@link ZipFile} class is preferred when reading from files.</p>
051 *
052 * <p>As of Apache Commons Compress it transparently supports Zip64
053 * extensions and thus individual entries and archives larger than 4
054 * GB or with more than 65536 entries.</p>
055 *
056 * @see ZipFile
057 * @NotThreadSafe
058 */
059public class ZipArchiveInputStream extends ArchiveInputStream {
060
061    /** The zip encoding to use for filenames and the file comment. */
062    private final ZipEncoding zipEncoding;
063
064    // the provided encoding (for unit tests)
065    final String encoding;
066
067    /** Whether to look for and use Unicode extra fields. */
068    private final boolean useUnicodeExtraFields;
069
070    /** Wrapped stream, will always be a PushbackInputStream. */
071    private final InputStream in;
072
073    /** Inflater used for all deflated entries. */
074    private final Inflater inf = new Inflater(true);
075
076    /** Buffer used to read from the wrapped stream. */
077    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
078
079    /** The entry that is currently being read. */
080    private CurrentEntry current = null;
081
082    /** Whether the stream has been closed. */
083    private boolean closed = false;
084
085    /** Whether the stream has reached the central directory - and thus found all entries. */
086    private boolean hitCentralDirectory = false;
087
088    /**
089     * When reading a stored entry that uses the data descriptor this
090     * stream has to read the full entry and caches it.  This is the
091     * cache.
092     */
093    private ByteArrayInputStream lastStoredEntry = null;
094
095    /** Whether the stream will try to read STORED entries that use a data descriptor. */
096    private boolean allowStoredEntriesWithDataDescriptor = false;
097
098    private static final int LFH_LEN = 30;
099    /*
100      local file header signature     WORD
101      version needed to extract       SHORT
102      general purpose bit flag        SHORT
103      compression method              SHORT
104      last mod file time              SHORT
105      last mod file date              SHORT
106      crc-32                          WORD
107      compressed size                 WORD
108      uncompressed size               WORD
109      file name length                SHORT
110      extra field length              SHORT
111    */
112
113    private static final int CFH_LEN = 46;
114    /*
115        central file header signature   WORD
116        version made by                 SHORT
117        version needed to extract       SHORT
118        general purpose bit flag        SHORT
119        compression method              SHORT
120        last mod file time              SHORT
121        last mod file date              SHORT
122        crc-32                          WORD
123        compressed size                 WORD
124        uncompressed size               WORD
125        file name length                SHORT
126        extra field length              SHORT
127        file comment length             SHORT
128        disk number start               SHORT
129        internal file attributes        SHORT
130        external file attributes        WORD
131        relative offset of local header WORD
132    */
133
134    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
135
136    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
137    private final byte[] LFH_BUF = new byte[LFH_LEN];
138    private final byte[] SKIP_BUF = new byte[1024];
139    private final byte[] SHORT_BUF = new byte[SHORT];
140    private final byte[] WORD_BUF = new byte[WORD];
141    private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
142
143    private int entriesRead = 0;
144
145    /**
146     * Create an instance using UTF-8 encoding
147     * @param inputStream the stream to wrap
148     */
149    public ZipArchiveInputStream(InputStream inputStream) {
150        this(inputStream, ZipEncodingHelper.UTF8);
151    }
152
153    /**
154     * @param encoding the encoding to use for file names, use null
155     * for the platform's default encoding
156     * @since 1.5
157     */
158    public ZipArchiveInputStream(InputStream inputStream, String encoding) {
159        this(inputStream, encoding, true);
160    }
161
162    /**
163     * @param encoding the encoding to use for file names, use null
164     * for the platform's default encoding
165     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
166     * Extra Fields (if present) to set the file names.
167     */
168    public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) {
169        this(inputStream, encoding, useUnicodeExtraFields, false);
170    }
171
172    /**
173     * @param encoding the encoding to use for file names, use null
174     * for the platform's default encoding
175     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
176     * Extra Fields (if present) to set the file names.
177     * @param allowStoredEntriesWithDataDescriptor whether the stream
178     * will try to read STORED entries that use a data descriptor
179     * @since 1.1
180     */
181    public ZipArchiveInputStream(InputStream inputStream,
182                                 String encoding,
183                                 boolean useUnicodeExtraFields,
184                                 boolean allowStoredEntriesWithDataDescriptor) {
185        this.encoding = encoding;
186        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
187        this.useUnicodeExtraFields = useUnicodeExtraFields;
188        in = new PushbackInputStream(inputStream, buf.capacity());
189        this.allowStoredEntriesWithDataDescriptor =
190            allowStoredEntriesWithDataDescriptor;
191        // haven't read anything so far
192        buf.limit(0);
193    }
194
195    public ZipArchiveEntry getNextZipEntry() throws IOException {
196        boolean firstEntry = true;
197        if (closed || hitCentralDirectory) {
198            return null;
199        }
200        if (current != null) {
201            closeEntry();
202            firstEntry = false;
203        }
204
205        try {
206            if (firstEntry) {
207                // split archives have a special signature before the
208                // first local file header - look for it and fail with
209                // the appropriate error message if this is a split
210                // archive.
211                readFirstLocalFileHeader(LFH_BUF);
212            } else {
213                readFully(LFH_BUF);
214            }
215        } catch (EOFException e) {
216            return null;
217        }
218
219        ZipLong sig = new ZipLong(LFH_BUF);
220        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
221            hitCentralDirectory = true;
222            skipRemainderOfArchive();
223        }
224        if (!sig.equals(ZipLong.LFH_SIG)) {
225            return null;
226        }
227
228        int off = WORD;
229        current = new CurrentEntry();
230
231        int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
232        off += SHORT;
233        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
234
235        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
236        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
237        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
238        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
239        current.entry.setGeneralPurposeBit(gpFlag);
240
241        off += SHORT;
242
243        current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
244        off += SHORT;
245
246        long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
247        current.entry.setTime(time);
248        off += WORD;
249
250        ZipLong size = null, cSize = null;
251        if (!current.hasDataDescriptor) {
252            current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
253            off += WORD;
254
255            cSize = new ZipLong(LFH_BUF, off);
256            off += WORD;
257
258            size = new ZipLong(LFH_BUF, off);
259            off += WORD;
260        } else {
261            off += 3 * WORD;
262        }
263
264        int fileNameLen = ZipShort.getValue(LFH_BUF, off);
265
266        off += SHORT;
267
268        int extraLen = ZipShort.getValue(LFH_BUF, off);
269        off += SHORT;
270
271        byte[] fileName = new byte[fileNameLen];
272        readFully(fileName);
273        current.entry.setName(entryEncoding.decode(fileName), fileName);
274
275        byte[] extraData = new byte[extraLen];
276        readFully(extraData);
277        current.entry.setExtra(extraData);
278
279        if (!hasUTF8Flag && useUnicodeExtraFields) {
280            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
281        }
282
283        processZip64Extra(size, cSize);
284
285        if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) {
286            if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
287                current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
288            } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
289                current.in = new ExplodingInputStream(
290                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
291                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
292                        new BoundedInputStream(in, current.entry.getCompressedSize()));
293            }
294        }
295        
296        entriesRead++;
297        return current.entry;
298    }
299
300    /**
301     * Fills the given array with the first local file header and
302     * deals with splitting/spanning markers that may prefix the first
303     * LFH.
304     */
305    private void readFirstLocalFileHeader(byte[] lfh) throws IOException {
306        readFully(lfh);
307        ZipLong sig = new ZipLong(lfh);
308        if (sig.equals(ZipLong.DD_SIG)) {
309            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
310        }
311
312        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
313            // The archive is not really split as only one segment was
314            // needed in the end.  Just skip over the marker.
315            byte[] missedLfhBytes = new byte[4];
316            readFully(missedLfhBytes);
317            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
318            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
319        }
320    }
321
322    /**
323     * Records whether a Zip64 extra is present and sets the size
324     * information from it if sizes are 0xFFFFFFFF and the entry
325     * doesn't use a data descriptor.
326     */
327    private void processZip64Extra(ZipLong size, ZipLong cSize) {
328        Zip64ExtendedInformationExtraField z64 =
329            (Zip64ExtendedInformationExtraField) 
330            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
331        current.usesZip64 = z64 != null;
332        if (!current.hasDataDescriptor) {
333            if (z64 != null // same as current.usesZip64 but avoids NPE warning
334                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
335                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
336                current.entry.setSize(z64.getSize().getLongValue());
337            } else {
338                current.entry.setCompressedSize(cSize.getValue());
339                current.entry.setSize(size.getValue());
340            }
341        }
342    }
343
344    @Override
345    public ArchiveEntry getNextEntry() throws IOException {
346        return getNextZipEntry();
347    }
348
349    /**
350     * Whether this class is able to read the given entry.
351     *
352     * <p>May return false if it is set up to use encryption or a
353     * compression method that hasn't been implemented yet.</p>
354     * @since 1.1
355     */
356    @Override
357    public boolean canReadEntryData(ArchiveEntry ae) {
358        if (ae instanceof ZipArchiveEntry) {
359            ZipArchiveEntry ze = (ZipArchiveEntry) ae;
360            return ZipUtil.canHandleEntryData(ze)
361                && supportsDataDescriptorFor(ze);
362
363        }
364        return false;
365    }
366
367    @Override
368    public int read(byte[] buffer, int offset, int length) throws IOException {
369        if (closed) {
370            throw new IOException("The stream is closed");
371        }
372
373        if (current == null) {
374            return -1;
375        }
376
377        // avoid int overflow, check null buffer
378        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
379            throw new ArrayIndexOutOfBoundsException();
380        }
381        
382        ZipUtil.checkRequestedFeatures(current.entry);
383        if (!supportsDataDescriptorFor(current.entry)) {
384            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
385                    current.entry);
386        }
387
388        int read;
389        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
390            read = readStored(buffer, offset, length);
391        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
392            read = readDeflated(buffer, offset, length);
393        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
394                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
395            read = current.in.read(buffer, offset, length);
396        } else {
397            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
398                    current.entry);
399        }
400        
401        if (read >= 0) {
402            current.crc.update(buffer, offset, read);
403        }
404        
405        return read;
406    }
407
408    /**
409     * Implementation of read for STORED entries.
410     */
411    private int readStored(byte[] buffer, int offset, int length) throws IOException {
412
413        if (current.hasDataDescriptor) {
414            if (lastStoredEntry == null) {
415                readStoredEntry();
416            }
417            return lastStoredEntry.read(buffer, offset, length);
418        }
419
420        long csize = current.entry.getSize();
421        if (current.bytesRead >= csize) {
422            return -1;
423        }
424
425        if (buf.position() >= buf.limit()) {
426            buf.position(0);
427            int l = in.read(buf.array());
428            if (l == -1) {
429                return -1;
430            }
431            buf.limit(l);
432
433            count(l);
434            current.bytesReadFromStream += l;
435        }
436
437        int toRead = Math.min(buf.remaining(), length);
438        if ((csize - current.bytesRead) < toRead) {
439            // if it is smaller than toRead then it fits into an int
440            toRead = (int) (csize - current.bytesRead);
441        }
442        buf.get(buffer, offset, toRead);
443        current.bytesRead += toRead;
444        return toRead;
445    }
446
447    /**
448     * Implementation of read for DEFLATED entries.
449     */
450    private int readDeflated(byte[] buffer, int offset, int length) throws IOException {
451        int read = readFromInflater(buffer, offset, length);
452        if (read <= 0) {
453            if (inf.finished()) {
454                return -1;
455            } else if (inf.needsDictionary()) {
456                throw new ZipException("This archive needs a preset dictionary"
457                                       + " which is not supported by Commons"
458                                       + " Compress.");
459            } else if (read == -1) {
460                throw new IOException("Truncated ZIP file");
461            }
462        }
463        return read;
464    }
465
466    /**
467     * Potentially reads more bytes to fill the inflater's buffer and
468     * reads from it.
469     */
470    private int readFromInflater(byte[] buffer, int offset, int length) throws IOException {
471        int read = 0;
472        do {
473            if (inf.needsInput()) {
474                int l = fill();
475                if (l > 0) {
476                    current.bytesReadFromStream += buf.limit();
477                } else if (l == -1) {
478                    return -1;
479                } else {
480                    break;
481                }
482            }
483            try {
484                read = inf.inflate(buffer, offset, length);
485            } catch (DataFormatException e) {
486                throw (IOException) new ZipException(e.getMessage()).initCause(e);
487            }
488        } while (read == 0 && inf.needsInput());
489        return read;
490    }
491
492    @Override
493    public void close() throws IOException {
494        if (!closed) {
495            closed = true;
496            in.close();
497            inf.end();
498        }
499    }
500
501    /**
502     * Skips over and discards value bytes of data from this input
503     * stream.
504     *
505     * <p>This implementation may end up skipping over some smaller
506     * number of bytes, possibly 0, if and only if it reaches the end
507     * of the underlying stream.</p>
508     *
509     * <p>The actual number of bytes skipped is returned.</p>
510     *
511     * @param value the number of bytes to be skipped.
512     * @return the actual number of bytes skipped.
513     * @throws IOException - if an I/O error occurs.
514     * @throws IllegalArgumentException - if value is negative.
515     */
516    @Override
517    public long skip(long value) throws IOException {
518        if (value >= 0) {
519            long skipped = 0;
520            while (skipped < value) {
521                long rem = value - skipped;
522                int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
523                if (x == -1) {
524                    return skipped;
525                }
526                skipped += x;
527            }
528            return skipped;
529        }
530        throw new IllegalArgumentException();
531    }
532
533    /**
534     * Checks if the signature matches what is expected for a zip file.
535     * Does not currently handle self-extracting zips which may have arbitrary
536     * leading content.
537     *
538     * @param signature the bytes to check
539     * @param length    the number of bytes to check
540     * @return true, if this stream is a zip archive stream, false otherwise
541     */
542    public static boolean matches(byte[] signature, int length) {
543        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
544            return false;
545        }
546
547        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
548            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
549            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
550            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
551    }
552
553    private static boolean checksig(byte[] signature, byte[] expected) {
554        for (int i = 0; i < expected.length; i++) {
555            if (signature[i] != expected[i]) {
556                return false;
557            }
558        }
559        return true;
560    }
561
562    /**
563     * Closes the current ZIP archive entry and positions the underlying
564     * stream to the beginning of the next entry. All per-entry variables
565     * and data structures are cleared.
566     * <p>
567     * If the compressed size of this entry is included in the entry header,
568     * then any outstanding bytes are simply skipped from the underlying
569     * stream without uncompressing them. This allows an entry to be safely
570     * closed even if the compression method is unsupported.
571     * <p>
572     * In case we don't know the compressed size of this entry or have
573     * already buffered too much data from the underlying stream to support
574     * uncompression, then the uncompression process is completed and the
575     * end position of the stream is adjusted based on the result of that
576     * process.
577     *
578     * @throws IOException if an error occurs
579     */
580    private void closeEntry() throws IOException {
581        if (closed) {
582            throw new IOException("The stream is closed");
583        }
584        if (current == null) {
585            return;
586        }
587
588        // Ensure all entry bytes are read
589        if (current.bytesReadFromStream <= current.entry.getCompressedSize()
590                && !current.hasDataDescriptor) {
591            drainCurrentEntryData();
592        } else {
593            skip(Long.MAX_VALUE);
594
595            long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
596                       ? getBytesInflated() : current.bytesRead;
597
598            // this is at most a single read() operation and can't
599            // exceed the range of int
600            int diff = (int) (current.bytesReadFromStream - inB);
601
602            // Pushback any required bytes
603            if (diff > 0) {
604                pushback(buf.array(), buf.limit() - diff, diff);
605            }
606        }
607
608        if (lastStoredEntry == null && current.hasDataDescriptor) {
609            readDataDescriptor();
610        }
611
612        inf.reset();
613        buf.clear().flip();
614        current = null;
615        lastStoredEntry = null;
616    }
617
618    /**
619     * Read all data of the current entry from the underlying stream
620     * that hasn't been read, yet.
621     */
622    private void drainCurrentEntryData() throws IOException {
623        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
624        while (remaining > 0) {
625            long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
626            if (n < 0) {
627                throw new EOFException("Truncated ZIP entry: " + current.entry.getName());
628            } else {
629                count(n);
630                remaining -= n;
631            }
632        }
633    }
634
635    /**
636     * Get the number of bytes Inflater has actually processed.
637     *
638     * <p>for Java &lt; Java7 the getBytes* methods in
639     * Inflater/Deflater seem to return unsigned ints rather than
640     * longs that start over with 0 at 2^32.</p>
641     *
642     * <p>The stream knows how many bytes it has read, but not how
643     * many the Inflater actually consumed - it should be between the
644     * total number of bytes read for the entry and the total number
645     * minus the last read operation.  Here we just try to make the
646     * value close enough to the bytes we've read by assuming the
647     * number of bytes consumed must be smaller than (or equal to) the
648     * number of bytes read but not smaller by more than 2^32.</p>
649     */
650    private long getBytesInflated() {
651        long inB = inf.getBytesRead();
652        if (current.bytesReadFromStream >= TWO_EXP_32) {
653            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
654                inB += TWO_EXP_32;
655            }
656        }
657        return inB;
658    }
659
660    private int fill() throws IOException {
661        if (closed) {
662            throw new IOException("The stream is closed");
663        }
664        int length = in.read(buf.array());
665        if (length > 0) {
666            buf.limit(length);
667            count(buf.limit());
668            inf.setInput(buf.array(), 0, buf.limit());
669        }
670        return length;
671    }
672
673    private void readFully(byte[] b) throws IOException {
674        int count = IOUtils.readFully(in, b);
675        count(count);
676        if (count < b.length) {
677            throw new EOFException();
678        }
679    }
680
681    private void readDataDescriptor() throws IOException {
682        readFully(WORD_BUF);
683        ZipLong val = new ZipLong(WORD_BUF);
684        if (ZipLong.DD_SIG.equals(val)) {
685            // data descriptor with signature, skip sig
686            readFully(WORD_BUF);
687            val = new ZipLong(WORD_BUF);
688        }
689        current.entry.setCrc(val.getValue());
690
691        // if there is a ZIP64 extra field, sizes are eight bytes
692        // each, otherwise four bytes each.  Unfortunately some
693        // implementations - namely Java7 - use eight bytes without
694        // using a ZIP64 extra field -
695        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
696
697        // just read 16 bytes and check whether bytes nine to twelve
698        // look like one of the signatures of what could follow a data
699        // descriptor (ignoring archive decryption headers for now).
700        // If so, push back eight bytes and assume sizes are four
701        // bytes, otherwise sizes are eight bytes each.
702        readFully(TWO_DWORD_BUF);
703        ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
704        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
705            pushback(TWO_DWORD_BUF, DWORD, DWORD);
706            current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
707            current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
708        } else {
709            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
710            current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
711        }
712    }
713
714    /**
715     * Whether this entry requires a data descriptor this library can work with.
716     *
717     * @return true if allowStoredEntriesWithDataDescriptor is true,
718     * the entry doesn't require any data descriptor or the method is
719     * DEFLATED.
720     */
721    private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
722        return !entry.getGeneralPurposeBit().usesDataDescriptor()
723
724                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
725                || entry.getMethod() == ZipEntry.DEFLATED;
726    }
727
728    /**
729     * Caches a stored entry that uses the data descriptor.
730     *
731     * <ul>
732     *   <li>Reads a stored entry until the signature of a local file
733     *     header, central directory header or data descriptor has been
734     *     found.</li>
735     *   <li>Stores all entry data in lastStoredEntry.</p>
736     *   <li>Rewinds the stream to position at the data
737     *     descriptor.</li>
738     *   <li>reads the data descriptor</li>
739     * </ul>
740     *
741     * <p>After calling this method the entry should know its size,
742     * the entry's data is cached and the stream is positioned at the
743     * next local file or central directory header.</p>
744     */
745    private void readStoredEntry() throws IOException {
746        ByteArrayOutputStream bos = new ByteArrayOutputStream();
747        int off = 0;
748        boolean done = false;
749
750        // length of DD without signature
751        int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
752
753        while (!done) {
754            int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
755            if (r <= 0) {
756                // read the whole archive without ever finding a
757                // central directory
758                throw new IOException("Truncated ZIP file");
759            }
760            if (r + off < 4) {
761                // buffer too small to check for a signature, loop
762                off += r;
763                continue;
764            }
765
766            done = bufferContainsSignature(bos, off, r, ddLen);
767            if (!done) {
768                off = cacheBytesRead(bos, off, r, ddLen);
769            }
770        }
771
772        byte[] b = bos.toByteArray();
773        lastStoredEntry = new ByteArrayInputStream(b);
774    }
775
776    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
777    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
778    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
779
780    /**
781     * Checks whether the current buffer contains the signature of a
782     * &quot;data descriptor&quot;, &quot;local file header&quot; or
783     * &quot;central directory entry&quot;.
784     *
785     * <p>If it contains such a signature, reads the data descriptor
786     * and positions the stream right after the data descriptor.</p>
787     */
788    private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen)
789            throws IOException {
790
791        boolean done = false;
792        int readTooMuch = 0;
793        for (int i = 0; !done && i < lastRead - 4; i++) {
794            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
795                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
796                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
797                    // found a LFH or CFH:
798                    readTooMuch = offset + lastRead - i - expectedDDLen;
799                    done = true;
800                }
801                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
802                    // found DD:
803                    readTooMuch = offset + lastRead - i;
804                    done = true;
805                }
806                if (done) {
807                    // * push back bytes read in excess as well as the data
808                    //   descriptor
809                    // * copy the remaining bytes to cache
810                    // * read data descriptor
811                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
812                    bos.write(buf.array(), 0, i);
813                    readDataDescriptor();
814                }
815            }
816        }
817        return done;
818    }
819
820    /**
821     * If the last read bytes could hold a data descriptor and an
822     * incomplete signature then save the last bytes to the front of
823     * the buffer and cache everything in front of the potential data
824     * descriptor into the given ByteArrayOutputStream.
825     *
826     * <p>Data descriptor plus incomplete signature (3 bytes in the
827     * worst case) can be 20 bytes max.</p>
828     */
829    private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) {
830        final int cacheable = offset + lastRead - expecteDDLen - 3;
831        if (cacheable > 0) {
832            bos.write(buf.array(), 0, cacheable);
833            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
834            offset = expecteDDLen + 3;
835        } else {
836            offset += lastRead;
837        }
838        return offset;
839    }
840
841    private void pushback(byte[] buf, int offset, int length) throws IOException {
842        ((PushbackInputStream) in).unread(buf, offset, length);
843        pushedBackBytes(length);
844    }
845
846    // End of Central Directory Record
847    //   end of central dir signature    WORD
848    //   number of this disk             SHORT
849    //   number of the disk with the
850    //   start of the central directory  SHORT
851    //   total number of entries in the
852    //   central directory on this disk  SHORT
853    //   total number of entries in
854    //   the central directory           SHORT
855    //   size of the central directory   WORD
856    //   offset of start of central
857    //   directory with respect to
858    //   the starting disk number        WORD
859    //   .ZIP file comment length        SHORT
860    //   .ZIP file comment               up to 64KB
861    //
862
863    /**
864     * Reads the stream until it find the "End of central directory
865     * record" and consumes it as well.
866     */
867    private void skipRemainderOfArchive() throws IOException {
868        // skip over central directory. One LFH has been read too much
869        // already.  The calculation discounts file names and extra
870        // data so it will be too short.
871        realSkip(entriesRead * CFH_LEN - LFH_LEN);
872        findEocdRecord();
873        realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
874        readFully(SHORT_BUF);
875        // file comment
876        realSkip(ZipShort.getValue(SHORT_BUF));
877    }
878
879    /**
880     * Reads forward until the signature of the &quot;End of central
881     * directory&quot; record is found.
882     */
883    private void findEocdRecord() throws IOException {
884        int currentByte = -1;
885        boolean skipReadCall = false;
886        while (skipReadCall || (currentByte = readOneByte()) > -1) {
887            skipReadCall = false;
888            if (!isFirstByteOfEocdSig(currentByte)) {
889                continue;
890            }
891            currentByte = readOneByte();
892            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
893                if (currentByte == -1) {
894                    break;
895                }
896                skipReadCall = isFirstByteOfEocdSig(currentByte);
897                continue;
898            }
899            currentByte = readOneByte();
900            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
901                if (currentByte == -1) {
902                    break;
903                }
904                skipReadCall = isFirstByteOfEocdSig(currentByte);
905                continue;
906            }
907            currentByte = readOneByte();
908            if (currentByte == -1
909                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
910                break;
911            }
912            skipReadCall = isFirstByteOfEocdSig(currentByte);
913        }
914    }
915
916    /**
917     * Skips bytes by reading from the underlying stream rather than
918     * the (potentially inflating) archive stream - which {@link
919     * #skip} would do.
920     *
921     * Also updates bytes-read counter.
922     */
923    private void realSkip(long value) throws IOException {
924        if (value >= 0) {
925            long skipped = 0;
926            while (skipped < value) {
927                long rem = value - skipped;
928                int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
929                if (x == -1) {
930                    return;
931                }
932                count(x);
933                skipped += x;
934            }
935            return;
936        }
937        throw new IllegalArgumentException();
938    }
939
940    /**
941     * Reads bytes by reading from the underlying stream rather than
942     * the (potentially inflating) archive stream - which {@link #read} would do.
943     *
944     * Also updates bytes-read counter.
945     */
946    private int readOneByte() throws IOException {
947        int b = in.read();
948        if (b != -1) {
949            count(1);
950        }
951        return b;
952    }
953
954    private boolean isFirstByteOfEocdSig(int b) {
955        return b == ZipArchiveOutputStream.EOCD_SIG[0];
956    }
957
958    /**
959     * Structure collecting information for the entry that is
960     * currently being read.
961     */
962    private static final class CurrentEntry {
963
964        /**
965         * Current ZIP entry.
966         */
967        private final ZipArchiveEntry entry = new ZipArchiveEntry();
968
969        /**
970         * Does the entry use a data descriptor?
971         */
972        private boolean hasDataDescriptor;
973
974        /**
975         * Does the entry have a ZIP64 extended information extra field.
976         */
977        private boolean usesZip64;
978
979        /**
980         * Number of bytes of entry content read by the client if the
981         * entry is STORED.
982         */
983        private long bytesRead;
984
985        /**
986         * Number of bytes of entry content read so from the stream.
987         *
988         * <p>This may be more than the actual entry's length as some
989         * stuff gets buffered up and needs to be pushed back when the
990         * end of the entry has been reached.</p>
991         */
992        private long bytesReadFromStream;
993
994        /**
995         * The checksum calculated as the current entry is read.
996         */
997        private final CRC32 crc = new CRC32();
998
999        /**
1000         * The input stream decompressing the data for shrunk and imploded entries.
1001         */
1002        private InputStream in;
1003    }
1004
1005    /**
1006     * Bounded input stream adapted from commons-io
1007     */
1008    private class BoundedInputStream extends InputStream {
1009
1010        /** the wrapped input stream */
1011        private final InputStream in;
1012
1013        /** the max length to provide */
1014        private final long max;
1015
1016        /** the number of bytes already returned */
1017        private long pos = 0;
1018    
1019        /**
1020         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1021         * stream and limits it to a certain size.
1022         *
1023         * @param in The wrapped input stream
1024         * @param size The maximum number of bytes to return
1025         */
1026        public BoundedInputStream(final InputStream in, final long size) {
1027            this.max = size;
1028            this.in = in;
1029        }
1030
1031        @Override
1032        public int read() throws IOException {
1033            if (max >= 0 && pos >= max) {
1034                return -1;
1035            }
1036            final int result = in.read();
1037            pos++;
1038            count(1);
1039            current.bytesReadFromStream++;
1040            return result;
1041        }
1042
1043        @Override
1044        public int read(final byte[] b) throws IOException {
1045            return this.read(b, 0, b.length);
1046        }
1047
1048        @Override
1049        public int read(final byte[] b, final int off, final int len) throws IOException {
1050            if (max >= 0 && pos >= max) {
1051                return -1;
1052            }
1053            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1054            final int bytesRead = in.read(b, off, (int) maxRead);
1055
1056            if (bytesRead == -1) {
1057                return -1;
1058            }
1059
1060            pos += bytesRead;
1061            count(bytesRead);
1062            current.bytesReadFromStream += bytesRead;
1063            return bytesRead;
1064        }
1065
1066        @Override
1067        public long skip(final long n) throws IOException {
1068            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1069            final long skippedBytes = in.skip(toSkip);
1070            pos += skippedBytes;
1071            return skippedBytes;
1072        }
1073    
1074        @Override
1075        public int available() throws IOException {
1076            if (max >= 0 && pos >= max) {
1077                return 0;
1078            }
1079            return in.available();
1080        }
1081    }
1082}