001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.z; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.compressors.lzw.LZWInputStream; 026 027/** 028 * Input stream that decompresses .Z files. 029 * @NotThreadSafe 030 * @since 1.7 031 */ 032public class ZCompressorInputStream extends LZWInputStream { 033 private static final int MAGIC_1 = 0x1f; 034 private static final int MAGIC_2 = 0x9d; 035 private static final int BLOCK_MODE_MASK = 0x80; 036 private static final int MAX_CODE_SIZE_MASK = 0x1f; 037 private final boolean blockMode; 038 private final int maxCodeSize; 039 private long totalCodesRead = 0; 040 041 public ZCompressorInputStream(InputStream inputStream) throws IOException { 042 super(inputStream, ByteOrder.LITTLE_ENDIAN); 043 int firstByte = (int) in.readBits(8); 044 int secondByte = (int) in.readBits(8); 045 int thirdByte = (int) in.readBits(8); 046 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { 047 throw new IOException("Input is not in .Z format"); 048 } 049 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; 050 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; 051 if (blockMode) { 052 setClearCode(codeSize); 053 } 054 initializeTables(maxCodeSize); 055 clearEntries(); 056 } 057 058 private void clearEntries() { 059 tableSize = 1 << 8; 060 if (blockMode) { 061 tableSize++; 062 } 063 } 064 065 /** 066 * {@inheritDoc} 067 * <p><strong>This method is only protected for technical reasons 068 * and is not part of Commons Compress' published API. It may 069 * change or disappear without warning.</strong></p> 070 */ 071 @Override 072 protected int readNextCode() throws IOException { 073 int code = super.readNextCode(); 074 if (code >= 0) { 075 ++totalCodesRead; 076 } 077 return code; 078 } 079 080 private void reAlignReading() throws IOException { 081 // "compress" works in multiples of 8 symbols, each codeBits bits long. 082 // When codeBits changes, the remaining unused symbols in the current 083 // group of 8 are still written out, in the old codeSize, 084 // as garbage values (usually zeroes) that need to be skipped. 085 long codeReadsToThrowAway = 8 - (totalCodesRead % 8); 086 if (codeReadsToThrowAway == 8) { 087 codeReadsToThrowAway = 0; 088 } 089 for (long i = 0; i < codeReadsToThrowAway; i++) { 090 readNextCode(); 091 } 092 in.clearBitCache(); 093 } 094 095 /** 096 * {@inheritDoc} 097 * <p><strong>This method is only protected for technical reasons 098 * and is not part of Commons Compress' published API. It may 099 * change or disappear without warning.</strong></p> 100 */ 101 @Override 102 protected int addEntry(int previousCode, byte character) throws IOException { 103 final int maxTableSize = 1 << codeSize; 104 int r = addEntry(previousCode, character, maxTableSize); 105 if (tableSize == maxTableSize && codeSize < maxCodeSize) { 106 reAlignReading(); 107 codeSize++; 108 } 109 return r; 110 } 111 112 /** 113 * {@inheritDoc} 114 * <p><strong>This method is only protected for technical reasons 115 * and is not part of Commons Compress' published API. It may 116 * change or disappear without warning.</strong></p> 117 */ 118 @Override 119 protected int decompressNextSymbol() throws IOException { 120 // 121 // table entry table entry 122 // _____________ _____ 123 // table entry / \ / \ 124 // ____________/ \ \ 125 // / / \ / \ \ 126 // +---+---+---+---+---+---+---+---+---+---+ 127 // | . | . | . | . | . | . | . | . | . | . | 128 // +---+---+---+---+---+---+---+---+---+---+ 129 // |<--------->|<------------->|<----->|<->| 130 // symbol symbol symbol symbol 131 // 132 final int code = readNextCode(); 133 if (code < 0) { 134 return -1; 135 } else if (blockMode && code == clearCode) { 136 clearEntries(); 137 reAlignReading(); 138 codeSize = 9; 139 previousCode = -1; 140 return 0; 141 } else { 142 boolean addedUnfinishedEntry = false; 143 if (code == tableSize) { 144 addRepeatOfPreviousCode(); 145 addedUnfinishedEntry = true; 146 } else if (code > tableSize) { 147 throw new IOException(String.format("Invalid %d bit code 0x%x", codeSize, code)); 148 } 149 return expandCodeToOutputStack(code, addedUnfinishedEntry); 150 } 151 } 152 153 /** 154 * Checks if the signature matches what is expected for a Unix compress file. 155 * 156 * @param signature 157 * the bytes to check 158 * @param length 159 * the number of bytes to check 160 * @return true, if this stream is a Unix compress compressed 161 * stream, false otherwise 162 * 163 * @since 1.9 164 */ 165 public static boolean matches(byte[] signature, int length) { 166 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; 167 } 168 169}