using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
#if NET4
using System.Linq;
#else
using NewLife.Linq;
#endif
using BinaryReaderX = NewLife.Serialization.BinaryReaderX;
using BinaryWriterX = NewLife.Serialization.BinaryWriterX;
namespace NewLife.Compression
{
/// <summary>Zip文件</summary>
/// <remarks>
/// Zip定义位于 <a target="_blank" href="http://www.pkware.com/documents/casestudies/APPNOTE.TXT">http://www.pkware.com/documents/casestudies/APPNOTE.TXT</a>
///
/// 本程序只支持Zip基本功能,不支持加密和Zip64(用于超过2G的文件压缩)。
///
/// 基本常识:GZip/Deflate仅仅是数据压缩算法,只负责压缩一组数据;而Zip仅仅是一种打包用的文件格式,指示多个被压缩后的文件如何组合在一起形成一个压缩包,当然,这些被压缩的文件除了Deflate算法还可能有其它算法。
///
/// 核心原理:通过二进制序列化框架,实现Zip格式的解析,数据的压缩和解压缩由系统的DeflateStream完成!
///
/// 关于压缩算法:系统的DeflateStream实现了Deflate压缩算法,但是硬编码了四级压缩(共十级,第四级在快速压缩中压缩率最高)。相关硬编码位于内嵌的FastEncoderWindow类中。
///
/// 感谢@小董(1287263703)、@Johnses(285732917)的热心帮忙,发现了0字节文件压缩和解压缩的BUG!
/// </remarks>
/// <example>
/// 标准压缩:
/// <code>
/// using (ZipFile zf = new ZipFile())
/// {
/// zf.AddDirectory("TestZip");
///
/// using (var fs = File.Create("ab.zip"))
/// {
/// zf.Write(fs);
/// }
/// }
/// </code>
///
/// 标准解压缩:
/// <code>
/// using (ZipFile zf = new ZipFile(file))
/// {
/// zf.Extract("TestZip");
/// }
/// </code>
///
/// 快速压缩:
/// <code>
/// ZipFile.CompressFile("aa.doc");
/// ZipFile.CompressDirectory("TestZip");
/// </code>
///
/// 快速解压缩:
/// <code>
/// ZipFile.Extract("aa.zip", "Test");
/// </code>
/// </example>
public partial class ZipFile : DisposeBase, IEnumerable, IEnumerable<ZipEntry>
{
#region 属性
private String _Name;
/// <summary>名称</summary>
public String Name { get { return _Name; } set { _Name = value; } }
private String _Comment;
/// <summary>注释</summary>
public String Comment { get { return _Comment; } set { _Comment = value; } }
private Encoding _Encoding ;
/// <summary>字符串编码</summary>
public Encoding Encoding { get { return _Encoding ?? Encoding.Default; } set { _Encoding = value; } }
private Boolean _UseDirectory;
/// <summary>是否使用目录。不使用目录可以减少一点点文件大小,网络上的压缩包也这么做,但是Rar压缩的使用了目录</summary>
public Boolean UseDirectory { get { return _UseDirectory; } set { _UseDirectory = value; } }
#endregion
#region 构造
/// <summary>实例化一个Zip文件对象</summary>
public ZipFile() { }
/// <summary>实例化一个Zip文件对象</summary>
/// <param name="fileName"></param>
public ZipFile(String fileName) : this(fileName, null) { }
/// <summary>实例化一个Zip文件对象</summary>
/// <param name="fileName"></param>
/// <param name="encoding"></param>
public ZipFile(String fileName, Encoding encoding)
//: this(File.OpenRead(fileName), encoding)
{
//if (!String.IsNullOrEmpty(fileName)) DefaultExtractPath = Path.GetDirectoryName(fileName);
Name = fileName;
Encoding = encoding;
var fs = File.OpenRead(fileName);
try
{
Read(fs);
}
catch (Exception ex)
{
throw new ZipException("不是有效的Zip格式!", ex);
}
if (fs.Length < 10 * 1024 * 1024) fs.Dispose();
}
/// <summary>实例化一个Zip文件对象</summary>
/// <param name="stream"></param>
/// <param name="encoding"></param>
public ZipFile(Stream stream, Encoding encoding)
{
Encoding = encoding;
try
{
Read(stream);
}
catch (Exception ex)
{
throw new ZipException("不是有效的Zip格式!", ex);
}
}
/// <summary>释放资源</summary>
/// <param name="disposing"></param>
protected override void OnDispose(bool disposing)
{
base.OnDispose(disposing);
//if (_readStream != null) _readStream.Dispose();
if (Entries.Count > 0)
{
// 是否所有实体,因为里面可能含有数据流
foreach (var item in Entries.Values)
{
try
{
item.Dispose();
}
catch { }
}
Entries.Clear();
}
}
#endregion
#region 读取
/// <summary>从数据流中读取Zip格式数据</summary>
/// <param name="stream">数据流</param>
/// <param name="embedFileData">
/// 当前读取仅读取文件列表等信息,如果设置内嵌数据,则同时把文件数据读取到内存中;否则,在解压缩时需要再次使用数据流。
/// 如果外部未指定是否内嵌文件数据,则根据数据流是否小于10M来决定是否内嵌。
/// </param>
public void Read(Stream stream, Boolean? embedFileData = null)
{
// 如果外部未指定是否内嵌文件数据,则根据数据流是否小于10M来决定是否内嵌
Boolean embedfile = embedFileData ?? stream.Length < 10 * 1024 * 1024;
ZipEntry e;
bool firstEntry = true;
while ((e = ZipEntry.ReadEntry(this, stream, firstEntry, embedfile)) != null)
{
String name = e.FileName;
Int32 n = 2;
while (this[name] != null) { name = e.FileName + "" + n++; }
Entries.Add(name, e);
firstEntry = false;
if (!UseDirectory && e.IsDirectory) UseDirectory = true;
}
// 读取目录结构,但是可能有错误,需要屏蔽
try
{
var reader = CreateReader(stream);
// 根据签名寻找CentralDirectory,因为文件头数据之后可能有加密相关信息
//if (stream.IndexOf(BitConverter.GetBytes(ZipConstants.ZipDirEntrySignature)) >= 0)
{
ZipEntry de;
while ((de = ZipEntry.ReadDirEntry(this, stream)) != null)
{
e = Entries[de.FileName];
if (e != null)
{
//e.Comment = de.Comment;
//e.IsDirectory = de.IsDirectory;
e.CopyFromDirEntry(de);
}
}
// 这里应该是数字签名
if (reader.Expect(ZipConstants.DigitalSignature))
{
UInt16 n = reader.ReadUInt16();
if (n > 0) reader.ReadBytes(n);
}
}
// 读取目录结构尾记录
if (reader.Expect(ZipConstants.EndOfCentralDirectorySignature))
{
var ecd = reader.ReadObject<EndOfCentralDirectory>();
if (!String.IsNullOrEmpty(ecd.Comment)) Comment = ecd.Comment.TrimEnd('\0');
}
}
catch (ZipException) { }
catch (IOException) { }
}
#endregion
#region 写入
/// <summary>把Zip格式数据写入到数据流中</summary>
/// <param name="stream"></param>
public void Write(Stream stream)
{
if (stream == null) throw new ArgumentNullException("stream");
if (Entries.Count < 1) throw new ZipException("没有添加任何文件!");
var writer = CreateWriter(stream);
writer.Settings.IgnoreMembers = null;
// 写入文件头时忽略掉这些字段,这些都是DirEntry的字段
writer.Settings.IgnoreMembers = ZipEntry.dirMembers;
foreach (var item in Entries.Values)
{
if (UseDirectory || !item.IsDirectory) item.Write(writer);
}
var ecd = new EndOfCentralDirectory();
ecd.Offset = (UInt32)writer.Stream.Position;
writer.Settings.IgnoreMembers = null;
Int32 num = 0;
foreach (var item in Entries.Values)
{
// 每一个都需要写目录项
if (UseDirectory || !item.IsDirectory)
{
item.WriteDir(writer);
num++;
}
}
ecd.Comment = Comment;
// 加上\0结尾,否则会有一点乱码
if (!String.IsNullOrEmpty(ecd.Comment) && !ecd.Comment.EndsWith("\0")) ecd.Comment += "\0";
ecd.NumberOfEntries = (UInt16)num;
ecd.NumberOfEntriesOnThisDisk = (UInt16)num;
ecd.Size = (UInt32)writer.Stream.Position - ecd.Offset;
writer.WriteObject(ecd);
writer.Flush();
}
/// <summary>把Zip格式数据写入到文件中</summary>
/// <param name="fileName"></param>
public void Write(String fileName)
{
if (String.IsNullOrEmpty(fileName)) throw new ArgumentNullException("fileName");
using (var fs = File.Create(fileName))
{
Write(fs);
}
}
#endregion
#region 解压缩
/// <summary>解压缩</summary>
/// <param name="outputPath">目标路径</param>
/// <param name="overrideExisting">是否覆盖已有文件</param>
public void Extract(String outputPath, Boolean overrideExisting = true)
{
if (String.IsNullOrEmpty(outputPath)) throw new ArgumentNullException("outputPath");
foreach (var item in Entries.Values)
{
item.Extract(outputPath, overrideExisting);
}
}
/// <summary>快速解压缩</summary>
/// <param name="fileName"></param>
/// <param name="outputPath"></param>
/// <param name="overrideExisting"></param>
public static void Extract(String fileName, String outputPath, Boolean overrideExisting = true)
{
if (String.IsNullOrEmpty(fileName)) throw new ArgumentNullException("fileName");
// 默认使用没有后缀的路径作为目录
if (String.IsNullOrEmpty(outputPath)) outputPath = Path.GetFileNameWithoutExtension(fileName);
if (String.IsNullOrEmpty(outputPath)) throw new ArgumentNullException("outputPath");
using (ZipFile zf = new ZipFile(fileName))
{
zf.Extract(outputPath, overrideExisting);
}
}
#endregion
#region 压缩
/// <summary>添加文件。
/// 必须指定文件路径<paramref name="fileName"/>,如果不指定实体名<paramref name="entryName"/>,则使用文件名,并加到顶级目录。</summary>
/// <param name="fileName">文件路径</param>
/// <param name="entryName">实体名</param>
/// <param name="stored">是否仅存储,不压缩</param>
/// <returns></returns>
public ZipEntry AddFile(String fileName, String entryName = null, Boolean? stored = false)
{
if (String.IsNullOrEmpty(fileName)) throw new ArgumentNullException("fileName");
if (String.IsNullOrEmpty(entryName)) entryName = Path.GetFileName(fileName);
entryName = entryName.Replace(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
// 判断并添加目录
String dir = Path.GetDirectoryName(entryName);
if (!String.IsNullOrEmpty(dir))
{
if (!dir.EndsWith(DirSeparator)) dir += DirSeparator;
if (this[dir] == null)
{
var zde = new ZipEntry();
zde.FileName = dir;
Entries.Add(dir, zde);
}
}
var entry = ZipEntry.Create(fileName, entryName, stored);
Entries.Add(entry.FileName, entry);
return entry;
}
/// <summary>添加目录。
/// 必须指定目录<paramref name="dirName"/>,如果不指定实体名<paramref name="entryName"/>,则加到顶级目录。</summary>
/// <param name="dirName">目录</param>
/// <param name="entryName">实体名</param>
/// <param name="stored">是否仅存储,不压缩</param>
public void AddDirectory(String dirName, String entryName = null, Boolean? stored = null)
{
if (String.IsNullOrEmpty(dirName)) throw new ArgumentNullException("fileName");
dirName = Path.GetFullPath(dirName);
if (!String.IsNullOrEmpty(entryName))
{
var entry = ZipEntry.Create(null, entryName, true);
Entries.Add(entry.FileName, entry);
if (!entryName.EndsWith(DirSeparator)) entryName += DirSeparator;
}
// 所有文件
foreach (var item in Directory.GetFiles(dirName, "*.*", SearchOption.TopDirectoryOnly))
{
String name = item;
if (name.StartsWith(dirName)) name = name.Substring(dirName.Length);
if (name[0] == Path.DirectorySeparatorChar) name = name.Substring(1);
if (!String.IsNullOrEmpty(entryName)) name = entryName + name;
AddFile(item, name, stored);
}
foreach (var item in Directory.GetDirectories(dirName, "*", SearchOption.TopDirectoryOnly))
{
String name = item;
if (name.StartsWith(dirName)) name = name.Substring(dirName.Length);
if (name[0] == Path.DirectorySeparatorChar) name = name.Substring(1);
// 加上分隔符,表示目录
if (!name.EndsWith(DirSeparator)) name += DirSeparator;
if (!String.IsNullOrEmpty(entryName)) name = entryName + name;
AddDirectory(item, name, stored);
}
}
/// <summary>快速压缩文件。</summary>
/// <param name="fileName"></param>
/// <param name="outputName"></param>
public static void CompressFile(String fileName, String outputName = null)
{
if (String.IsNullOrEmpty(fileName)) throw new ArgumentNullException("fileName");
if (String.IsNullOrEmpty(outputName)) outputName = Path.ChangeExtension(Path.GetFileName(fileName), ".zip");
using (ZipFile zf = new ZipFile())
{
zf.AddFile(fileName);
zf.Write(outputName);
}
}
/// <summary>快速压缩目录。</summary>
/// <param name="dirName"></param>
/// <param name="outputName"></param>
public static void CompressDirectory(String dirName, String outputName = null)
{
if (String.IsNullOrEmpty(dirName)) throw new ArgumentNullException("dirName");
if (String.IsNullOrEmpty(outputName)) outputName = Path.ChangeExtension(Path.GetFileName(dirName), ".zip");
using (ZipFile zf = new ZipFile())
{
zf.AddDirectory(dirName);
zf.Write(outputName);
}
}
#endregion
#region 索引集合
private Dictionary<String, ZipEntry> _Entries;
/// <summary>文件实体集合</summary>
public Dictionary<String, ZipEntry> Entries
{
get
{
// 不区分大小写
if (_Entries == null) _Entries = new Dictionary<string, ZipEntry>(StringComparer.OrdinalIgnoreCase);
return _Entries;
}
}
/// <summary>返回指定索引处的实体</summary>
/// <param name="index"></param>
/// <returns></returns>
public ZipEntry this[Int32 index] { get { return Entries.Values.ElementAtOrDefault(index); } }
/// <summary>返回指定名称的实体</summary>
/// <param name="fileName"></param>
/// <returns></returns>
public ZipEntry this[String fileName]
{
get
{
var key = fileName;
key = key.Replace('\\', '/');
key = key.TrimStart('/');
var entries = Entries;
ZipEntry e = null;
if (entries.TryGetValue(key, out e)) return e;
key = key.Replace("/", "\\");
if (entries.TryGetValue(key, out e)) return e;
return null;
}
}
/// <summary>实体个数</summary>
public Int32 Count { get { return Entries.Count; } }
#endregion
#region 辅助
internal BinaryReaderX CreateReader(Stream stream)
{
var reader = new BinaryReaderX() { Stream = stream };
reader.Settings.EncodeInt = false;
reader.Settings.UseObjRef = false;
reader.Settings.SizeFormat = TypeCode.Int16;
reader.Settings.Encoding = Encoding;
//#if DEBUG
// reader.Debug = true;
// reader.EnableTraceStream();
//#endif
return reader;
}
internal BinaryWriterX CreateWriter(Stream stream)
{
var writer = new BinaryWriterX() { Stream = stream };
writer.Settings.EncodeInt = false;
writer.Settings.UseObjRef = false;
writer.Settings.SizeFormat = TypeCode.Int16;
writer.Settings.Encoding = Encoding;
//#if DEBUG
// writer.Debug = true;
// writer.EnableTraceStream();
//#endif
return writer;
}
internal static readonly DateTime MinDateTime = new DateTime(1980, 1, 1);
internal static DateTime DosDateTimeToFileTime(Int32 value)
{
if (value <= 0) return MinDateTime;
Int16 time = (Int16)(value & 0x0000FFFF);
Int16 date = (Int16)((value & 0xFFFF0000) >> 16);
int year = 1980 + ((date & 0xFE00) >> 9);
int month = (date & 0x01E0) >> 5;
int day = date & 0x001F;
int hour = (time & 0xF800) >> 11;
int minute = (time & 0x07E0) >> 5;
int second = (time & 0x001F) * 2;
return new DateTime(year, month, day, hour, minute, second);
}
internal static Int32 FileTimeToDosDateTime(DateTime value)
{
if (value <= MinDateTime) value = MinDateTime;
Int32 date = (value.Year - 1980) << 9 | value.Month << 5 | value.Day;
Int32 time = value.Hour << 11 | value.Minute << 5 | value.Second / 2;
return date << 16 | time;
}
internal readonly static String DirSeparator = Path.AltDirectorySeparatorChar.ToString();
/// <summary>已重载。</summary>
/// <returns></returns>
public override string ToString() { return String.Format("{0} [{1}]", Name, Entries.Count); }
#endregion
#region IEnumerable<ZipEntry> 成员
IEnumerator<ZipEntry> IEnumerable<ZipEntry>.GetEnumerator() { return Entries.Values.GetEnumerator(); }
IEnumerator IEnumerable.GetEnumerator() { return Entries.Values.GetEnumerator(); }
#endregion
#region CentralDirectory
class EndOfCentralDirectory
{
#region 属性
private UInt32 _Signature = ZipConstants.EndOfCentralDirectorySignature;
/// <summary>签名。end of central dir signature</summary>
public UInt32 Signature { get { return _Signature; } set { _Signature = value; } }
private UInt16 _DiskNumber;
/// <summary>卷号。number of this disk</summary>
public UInt16 DiskNumber { get { return _DiskNumber; } set { _DiskNumber = value; } }
private UInt16 _DiskNumberWithStart;
/// <summary>number of the disk with the start of the central directory</summary>
public UInt16 DiskNumberWithStart { get { return _DiskNumberWithStart; } set { _DiskNumberWithStart = value; } }
private UInt16 _NumberOfEntriesOnThisDisk;
/// <summary>total number of entries in the central directory on this disk</summary>
public UInt16 NumberOfEntriesOnThisDisk { get { return _NumberOfEntriesOnThisDisk; } set { _NumberOfEntriesOnThisDisk = value; } }
private UInt16 _NumberOfEntries;
/// <summary>total number of entries in the central directory</summary>
public UInt16 NumberOfEntries { get { return _NumberOfEntries; } set { _NumberOfEntries = value; } }
private UInt32 _Size;
/// <summary>size of the central directory</summary>
public UInt32 Size { get { return _Size; } set { _Size = value; } }
private UInt32 _Offset;
/// <summary>offset of start of central directory with respect to the starting disk number</summary>
public UInt32 Offset { get { return _Offset; } set { _Offset = value; } }
private String _Comment;
/// <summary>注释</summary>
public String Comment { get { return _Comment; } set { _Comment = value; } }
#endregion
}
#endregion
}
}
// ==================================================================
//
// Information on the ZIP format:
//
// From
// http://www.pkware.com/documents/casestudies/APPNOTE.TXT
//
// Overall .ZIP file format:
//
// [local file header 1]
// [file data 1]
// [data descriptor 1] ** sometimes
// .
// .
// .
// [local file header n]
// [file data n]
// [data descriptor n] ** sometimes
// [archive decryption header]
// [archive extra data record]
// [central directory]
// [zip64 end of central directory record]
// [zip64 end of central directory locator]
// [end of central directory record]
//
// Local File Header format:
// local file header signature ... 4 bytes (0x04034b50)
// version needed to extract ..... 2 bytes
// general purpose bit field ..... 2 bytes
// compression method ............ 2 bytes
// last mod file time ............ 2 bytes
// last mod file date............. 2 bytes
// crc-32 ........................ 4 bytes
// compressed size................ 4 bytes
// uncompressed size.............. 4 bytes
// file name length............... 2 bytes
// extra field length ............ 2 bytes
// file name varies
// extra field varies
//
//
// Data descriptor: (used only when bit 3 of the general purpose bitfield is set)
// (although, I have found zip files where bit 3 is not set, yet this descriptor is present!)
// local file header signature 4 bytes (0x08074b50) ** sometimes!!! Not always
// crc-32 4 bytes
// compressed size 4 bytes
// uncompressed size 4 bytes
//
//
// Central directory structure:
//
// [file header 1]
// .
// .
// .
// [file header n]
// [digital signature]
//
//
// File header: (This is a ZipDirEntry)
// central file header signature 4 bytes (0x02014b50)
// version made by 2 bytes
// version needed to extract 2 bytes
// general purpose bit flag 2 bytes
// compression method 2 bytes
// last mod file time 2 bytes
// last mod file date 2 bytes
// crc-32 4 bytes
// compressed size 4 bytes
// uncompressed size 4 bytes
// file name length 2 bytes
// extra field length 2 bytes
// file comment length 2 bytes
// disk number start 2 bytes
// internal file attributes ** 2 bytes
// external file attributes *** 4 bytes
// relative offset of local header 4 bytes
// file name (variable size)
// extra field (variable size)
// file comment (variable size)
//
// ** The internal file attributes, near as I can tell,
// uses 0x01 for a file and a 0x00 for a directory.
//
// ***The external file attributes follows the MS-DOS file attribute byte, described here:
// at http://support.microsoft.com/kb/q125019/
// 0x0010 => directory
// 0x0020 => file
//
//
// End of central directory record:
//
// end of central dir signature 4 bytes (0x06054b50)
// number of this disk 2 bytes
// number of the disk with the
// start of the central directory 2 bytes
// total number of entries in the
// central directory on this disk 2 bytes
// total number of entries in
// the central directory 2 bytes
// size of the central directory 4 bytes
// offset of start of central
// directory with respect to
// the starting disk number 4 bytes
// .ZIP file comment length 2 bytes
// .ZIP file comment (variable size)
//
// date and time are packed values, as MSDOS did them
// time: bits 0-4 : seconds (divided by 2)
// 5-10: minute
// 11-15: hour
// date bits 0-4 : day
// 5-8: month
// 9-15 year (since 1980)
//
// see http://msdn.microsoft.com/en-us/library/ms724274(VS.85).aspx
|