001/*002 * Licensed to the Apache Software Foundation (ASF) under one003 * or more contributor license agreements.  See the NOTICE file004 * distributed with this work for additional information005 * regarding copyright ownership.  The ASF licenses this file006 * to you under the Apache License, Version 2.0 (the007 * "License"); you may not use this file except in compliance008 * with the License.  You may obtain a copy of the License at009 *010 * http://www.apache.org/licenses/LICENSE-2.0011 *012 * Unless required by applicable law or agreed to in writing,013 * software distributed under the License is distributed on an014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY015 * KIND, either express or implied.  See the License for the016 * specific language governing permissions and limitations017 * under the License.018 */019package org.apache.commons.compress.archivers.dump;020021import org.apache.commons.compress.archivers.ArchiveException;022import org.apache.commons.compress.archivers.ArchiveInputStream;023import org.apache.commons.compress.archivers.zip.ZipEncoding;024import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;025026import java.io.EOFException;027import java.io.IOException;028import java.io.InputStream;029030import java.util.Arrays;031import java.util.Comparator;032import java.util.HashMap;033import java.util.Map;034import java.util.PriorityQueue;035import java.util.Queue;036import java.util.Stack;037038/**039 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.040 * Methods are provided to position at each successive entry in041 * the archive, and the read each entry as a normal input stream042 * using read().043 *044 * There doesn't seem to exist a hint on the encoding of string values045 * in any piece documentation.  Given the main purpose of dump/restore046 * is backing up a system it seems very likely the format uses the047 * current default encoding of the system.048 *049 * @NotThreadSafe050 */051public class DumpArchiveInputStream extends ArchiveInputStream {052    private DumpArchiveSummary summary;053    private DumpArchiveEntry active;054    private boolean isClosed;055    private boolean hasHitEOF;056    private long entrySize;057    private long entryOffset;058    private int readIdx;059    private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];060    private byte[] blockBuffer;061    private int recordOffset;062    private long filepos;063    protected TapeInputStream raw;064065    // map of ino -> dirent entry. We can use this to reconstruct full paths.066    private final Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();067068    // map of ino -> (directory) entry when we're missing one or more elements in the path.069    private final Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();070071    // queue of (directory) entries where we now have the full path.072    private Queue<DumpArchiveEntry> queue;073074    /**075     * The encoding to use for filenames and labels.076     */077    private final ZipEncoding zipEncoding;078079    // the provided encoding (for unit tests)080    final String encoding;081082    /**083     * Constructor using the platform's default encoding for file084     * names.085     *086     * @param is stream to read from087     * @throws ArchiveException on error088     */089    public DumpArchiveInputStream(InputStream is) throws ArchiveException {090        this(is, null);091    }092093    /**094     * Constructor.095     *096     * @param is stream to read from097     * @param encoding the encoding to use for file names, use null098     * for the platform's default encoding099     * @since 1.6100     * @throws ArchiveException on error101     */102    public DumpArchiveInputStream(InputStream is, String encoding)103        throws ArchiveException {104        this.raw = new TapeInputStream(is);105        this.hasHitEOF = false;106        this.encoding = encoding;107        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);108109        try {110            // read header, verify it's a dump archive.111            byte[] headerBytes = raw.readRecord();112113            if (!DumpArchiveUtil.verify(headerBytes)) {114                throw new UnrecognizedFormatException();115            }116117            // get summary information118            summary = new DumpArchiveSummary(headerBytes, this.zipEncoding);119120            // reset buffer with actual block size.121            raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());122123            // allocate our read buffer.124            blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];125126            // skip past CLRI and BITS segments since we don't handle them yet.127            readCLRI();128            readBITS();129        } catch (IOException ex) {130            throw new ArchiveException(ex.getMessage(), ex);131        }132133        // put in a dummy record for the root node.134        Dirent root = new Dirent(2, 2, 4, ".");135        names.put(2, root);136137        // use priority based on queue to ensure parent directories are138        // released first.139        queue = new PriorityQueue<DumpArchiveEntry>(10,140                new Comparator<DumpArchiveEntry>() {141                    public int compare(DumpArchiveEntry p, DumpArchiveEntry q) {142                        if (p.getOriginalName() == null || q.getOriginalName() == null) {143                            return Integer.MAX_VALUE;144                        }145146                        return p.getOriginalName().compareTo(q.getOriginalName());147                    }148                });149    }150151    @Deprecated152    @Override153    public int getCount() {154        return (int) getBytesRead();155    }156157    @Override158    public long getBytesRead() {159        return raw.getBytesRead();160    }161162    /**163     * Return the archive summary information.164     * @return the summary165     */166    public DumpArchiveSummary getSummary() {167        return summary;168    }169170    /**171     * Read CLRI (deleted inode) segment.172     */173    private void readCLRI() throws IOException {174        byte[] buffer = raw.readRecord();175176        if (!DumpArchiveUtil.verify(buffer)) {177            throw new InvalidFormatException();178        }179180        active = DumpArchiveEntry.parse(buffer);181182        if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {183            throw new InvalidFormatException();184        }185186        // we don't do anything with this yet.187        if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())188            == -1) {189            throw new EOFException();190        }191        readIdx = active.getHeaderCount();192    }193194    /**195     * Read BITS segment.196     */197    private void readBITS() throws IOException {198        byte[] buffer = raw.readRecord();199200        if (!DumpArchiveUtil.verify(buffer)) {201            throw new InvalidFormatException();202        }203204        active = DumpArchiveEntry.parse(buffer);205206        if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {207            throw new InvalidFormatException();208        }209210        // we don't do anything with this yet.211        if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())212            == -1) {213            throw new EOFException();214        }215        readIdx = active.getHeaderCount();216    }217218    /**219     * Read the next entry.220     * @return the next entry221     * @throws IOException on error222     */223    public DumpArchiveEntry getNextDumpEntry() throws IOException {224        return getNextEntry();225    }226227    @Override228    public DumpArchiveEntry getNextEntry() throws IOException {229        DumpArchiveEntry entry = null;230        String path = null;231232        // is there anything in the queue?233        if (!queue.isEmpty()) {234            return queue.remove();235        }236237        while (entry == null) {238            if (hasHitEOF) {239                return null;240            }241242            // skip any remaining records in this segment for prior file.243            // we might still have holes... easiest to do it244            // block by block. We may want to revisit this if245            // the unnecessary decompression time adds up.246            while (readIdx < active.getHeaderCount()) {247                if (!active.isSparseRecord(readIdx++)248                    && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {249                    throw new EOFException();250                }251            }252253            readIdx = 0;254            filepos = raw.getBytesRead();255256            byte[] headerBytes = raw.readRecord();257258            if (!DumpArchiveUtil.verify(headerBytes)) {259                throw new InvalidFormatException();260            }261262            active = DumpArchiveEntry.parse(headerBytes);263264            // skip any remaining segments for prior file.265            while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {266                if (raw.skip(DumpArchiveConstants.TP_SIZE267                             * (active.getHeaderCount()268                                - active.getHeaderHoles())) == -1) {269                    throw new EOFException();270                }271272                filepos = raw.getBytesRead();273                headerBytes = raw.readRecord();274275                if (!DumpArchiveUtil.verify(headerBytes)) {276                    throw new InvalidFormatException();277                }278279                active = DumpArchiveEntry.parse(headerBytes);280            }281282            // check if this is an end-of-volume marker.283            if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {284                hasHitEOF = true;285286                return null;287            }288289            entry = active;290291            if (entry.isDirectory()) {292                readDirectoryEntry(active);293294                // now we create an empty InputStream.295                entryOffset = 0;296                entrySize = 0;297                readIdx = active.getHeaderCount();298            } else {299                entryOffset = 0;300                entrySize = active.getEntrySize();301                readIdx = 0;302            }303304            recordOffset = readBuf.length;305306            path = getPath(entry);307308            if (path == null) {309                entry = null;310            }311        }312313        entry.setName(path);314        entry.setSimpleName(names.get(entry.getIno()).getName());315        entry.setOffset(filepos);316317        return entry;318    }319320    /**321     * Read directory entry.322     */323    private void readDirectoryEntry(DumpArchiveEntry entry)324        throws IOException {325        long size = entry.getEntrySize();326        boolean first = true;327328        while (first ||329                DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) {330            // read the header that we just peeked at.331            if (!first) {332                raw.readRecord();333            }334335            if (!names.containsKey(entry.getIno()) &&336                    DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) {337                pending.put(entry.getIno(), entry);338            }339340            int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();341342            if (blockBuffer.length < datalen) {343                blockBuffer = new byte[datalen];344            }345346            if (raw.read(blockBuffer, 0, datalen) != datalen) {347                throw new EOFException();348            }349350            int reclen = 0;351352            for (int i = 0; i < datalen - 8 && i < size - 8;353                    i += reclen) {354                int ino = DumpArchiveUtil.convert32(blockBuffer, i);355                reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);356357                byte type = blockBuffer[i + 6];358359                String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]);360361                if (".".equals(name) || "..".equals(name)) {362                    // do nothing...363                    continue;364                }365366                Dirent d = new Dirent(ino, entry.getIno(), type, name);367368                /*369                if ((type == 4) && names.containsKey(ino)) {370                    System.out.println("we already have ino: " +371                                       names.get(ino));372                }373                */374375                names.put(ino, d);376377                // check whether this allows us to fill anything in the pending list.378                for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {379                    String path = getPath(e.getValue());380381                    if (path != null) {382                        e.getValue().setName(path);383                        e.getValue()384                         .setSimpleName(names.get(e.getKey()).getName());385                        queue.add(e.getValue());386                    }387                }388389                // remove anything that we found. (We can't do it earlier390                // because of concurrent modification exceptions.)391                for (DumpArchiveEntry e : queue) {392                    pending.remove(e.getIno());393                }394            }395396            byte[] peekBytes = raw.peek();397398            if (!DumpArchiveUtil.verify(peekBytes)) {399                throw new InvalidFormatException();400            }401402            entry = DumpArchiveEntry.parse(peekBytes);403            first = false;404            size -= DumpArchiveConstants.TP_SIZE;405        }406    }407408    /**409     * Get full path for specified archive entry, or null if there's a gap.410     *411     * @param entry412     * @return  full path for specified archive entry, or null if there's a gap.413     */414    private String getPath(DumpArchiveEntry entry) {415        // build the stack of elements. It's possible that we're 416        // still missing an intermediate value and if so we417        Stack<String> elements = new Stack<String>();418        Dirent dirent = null;419420        for (int i = entry.getIno();; i = dirent.getParentIno()) {421            if (!names.containsKey(i)) {422                elements.clear();423                break;424            }425426            dirent = names.get(i);427            elements.push(dirent.getName());428429            if (dirent.getIno() == dirent.getParentIno()) {430                break;431            }432        }433434        // if an element is missing defer the work and read next entry.435        if (elements.isEmpty()) {436            pending.put(entry.getIno(), entry);437438            return null;439        }440441        // generate full path from stack of elements.442        StringBuilder sb = new StringBuilder(elements.pop());443444        while (!elements.isEmpty()) {445            sb.append('/');446            sb.append(elements.pop());447        }448449        return sb.toString();450    }451452    /**453     * Reads bytes from the current dump archive entry.454     *455     * This method is aware of the boundaries of the current456     * entry in the archive and will deal with them as if they457     * were this stream's start and EOF.458     *459     * @param buf The buffer into which to place bytes read.460     * @param off The offset at which to place bytes read.461     * @param len The number of bytes to read.462     * @return The number of bytes read, or -1 at EOF.463     * @throws IOException on error464     */465    @Override466    public int read(byte[] buf, int off, int len) throws IOException {467        int totalRead = 0;468469        if (hasHitEOF || isClosed || entryOffset >= entrySize) {470            return -1;471        }472473        if (active == null) {474            throw new IllegalStateException("No current dump entry");475        }476477        if (len + entryOffset > entrySize) {478            len = (int) (entrySize - entryOffset);479        }480481        while (len > 0) {482            int sz = len > readBuf.length - recordOffset483                ? readBuf.length - recordOffset : len;484485            // copy any data we have486            if (recordOffset + sz <= readBuf.length) {487                System.arraycopy(readBuf, recordOffset, buf, off, sz);488                totalRead += sz;489                recordOffset += sz;490                len -= sz;491                off += sz;492            }493494            // load next block if necessary.495            if (len > 0) {496                if (readIdx >= 512) {497                    byte[] headerBytes = raw.readRecord();498499                    if (!DumpArchiveUtil.verify(headerBytes)) {500                        throw new InvalidFormatException();501                    }502503                    active = DumpArchiveEntry.parse(headerBytes);504                    readIdx = 0;505                }506507                if (!active.isSparseRecord(readIdx++)) {508                    int r = raw.read(readBuf, 0, readBuf.length);509                    if (r != readBuf.length) {510                        throw new EOFException();511                    }512                } else {513                    Arrays.fill(readBuf, (byte) 0);514                }515516                recordOffset = 0;517            }518        }519520        entryOffset += totalRead;521522        return totalRead;523    }524525    /**526     * Closes the stream for this entry.527     */528    @Override529    public void close() throws IOException {530        if (!isClosed) {531            isClosed = true;532            raw.close();533        }534    }535536    /**537     * Look at the first few bytes of the file to decide if it's a dump538     * archive. With 32 bytes we can look at the magic value, with a full539     * 1k we can verify the checksum.540     * @param buffer data to match541     * @param length length of data542     * @return whether the buffer seems to contain dump data543     */544    public static boolean matches(byte[] buffer, int length) {545        // do we have enough of the header?546        if (length < 32) {547            return false;548        }549550        // this is the best test551        if (length >= DumpArchiveConstants.TP_SIZE) {552            return DumpArchiveUtil.verify(buffer);553        }554555        // this will work in a pinch.556        return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,557            24);558    }559560}


NOTHING
NOTHING
Add the Maven Dependecy to your project: maven dependecy for com.amazonaws : aws-java-sdk : 1.3.14