Lean  $LEAN_TAG$
Compression.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using System;
17 using System.Collections.Generic;
18 using System.Diagnostics;
19 using System.IO;
20 using System.IO.Compression;
21 using System.Linq;
22 using System.Text;
23 using System.Threading.Tasks;
24 using ICSharpCode.SharpZipLib.Core;
25 using ICSharpCode.SharpZipLib.GZip;
26 using ICSharpCode.SharpZipLib.Tar;
27 using Ionic.Zip;
28 using QuantConnect.Logging;
29 using ZipEntry = ICSharpCode.SharpZipLib.Zip.ZipEntry;
30 using ZipFile = Ionic.Zip.ZipFile;
31 using ZipInputStream = ICSharpCode.SharpZipLib.Zip.ZipInputStream;
32 using ZipOutputStream = ICSharpCode.SharpZipLib.Zip.ZipOutputStream;
33 
34 namespace QuantConnect
35 {
36  /// <summary>
37  /// Compression class manages the opening and extraction of compressed files (zip, tar, tar.gz).
38  /// </summary>
39  /// <remarks>QuantConnect's data library is stored in zip format locally on the hard drive.</remarks>
40  public static class Compression
41  {
42  /// <summary>
43  /// Global Flag :: Operating System
44  /// </summary>
45  private static bool IsLinux
46  {
47  get
48  {
49  var p = (int)Environment.OSVersion.Platform;
50  return (p == 4) || (p == 6) || (p == 128);
51  }
52  }
53 
54  /// <summary>
55  /// Create a zip file of the supplied file names and string data source
56  /// </summary>
57  /// <param name="zipPath">Output location to save the file.</param>
58  /// <param name="filenamesAndData">File names and data in a dictionary format.</param>
59  /// <returns>True on successfully creating the zip file.</returns>
60  public static bool ZipData(string zipPath, Dictionary<string, string> filenamesAndData)
61  {
62  try
63  {
64  //Create our output
65  using (var stream = new ZipOutputStream(File.Create(zipPath)))
66  {
67  stream.SetLevel(0);
68  foreach (var kvp in filenamesAndData)
69  {
70  var filename = kvp.Key;
71  //Create the space in the zip file:
72  var entry = new ZipEntry(filename);
73  var bytes = Encoding.Default.GetBytes(kvp.Value);
74  stream.PutNextEntry(entry);
75  stream.Write(bytes, 0, bytes.Length);
76  stream.CloseEntry();
77  } // End For Each File.
78 
79  //Close stream:
80  stream.Finish();
81  stream.Close();
82  } // End Using
83  }
84  catch (Exception err)
85  {
86  Log.Error(err);
87  return false;
88  }
89  return true;
90  }
91 
92  /// <summary>
93  /// Create a zip file of the supplied file names and data using a byte array
94  /// </summary>
95  /// <param name="zipPath">Output location to save the file.</param>
96  /// <param name="filenamesAndData">File names and data in a dictionary format.</param>
97  /// <returns>True on successfully saving the file</returns>
98  public static bool ZipData(string zipPath, IEnumerable<KeyValuePair<string, byte[]>> filenamesAndData)
99  {
100  var success = true;
101  var buffer = new byte[4096];
102 
103  try
104  {
105  //Create our output
106  using (var stream = new ZipOutputStream(File.Create(zipPath)))
107  {
108  foreach (var file in filenamesAndData)
109  {
110  //Create the space in the zip file:
111  var entry = new ZipEntry(file.Key);
112  //Get a Byte[] of the file data:
113  stream.PutNextEntry(entry);
114 
115  using (var ms = new MemoryStream(file.Value))
116  {
117  int sourceBytes;
118  do
119  {
120  sourceBytes = ms.Read(buffer, 0, buffer.Length);
121  stream.Write(buffer, 0, sourceBytes);
122  }
123  while (sourceBytes > 0);
124  }
125  } // End For Each File.
126 
127  //Close stream:
128  stream.Finish();
129  stream.Close();
130  } // End Using
131  }
132  catch (Exception err)
133  {
134  Log.Error(err);
135  success = false;
136  }
137  return success;
138  }
139 
140  /// <summary>
141  /// Zips the specified lines of text into the zipPath
142  /// </summary>
143  /// <param name="zipPath">The destination zip file path</param>
144  /// <param name="zipEntry">The entry name in the zip</param>
145  /// <param name="lines">The lines to be written to the zip</param>
146  /// <returns>True if successful, otherwise false</returns>
147  public static bool ZipData(string zipPath, string zipEntry, IEnumerable<string> lines)
148  {
149  try
150  {
151  using (var stream = new ZipOutputStream(File.Create(zipPath)))
152  using (var writer = new StreamWriter(stream))
153  {
154  var entry = new ZipEntry(zipEntry);
155  stream.PutNextEntry(entry);
156  foreach (var line in lines)
157  {
158  writer.WriteLine(line);
159  }
160  }
161  return true;
162  }
163  catch (Exception err)
164  {
165  Log.Error(err);
166  return false;
167  }
168  }
169 
170  /// <summary>
171  /// Append the zip data to the file-entry specified.
172  /// </summary>
173  /// <param name="path">The zip file path</param>
174  /// <param name="entry">The entry name</param>
175  /// <param name="data">The entry data</param>
176  /// <param name="overrideEntry">True if should override entry if it already exists</param>
177  /// <returns>True on success</returns>
178  public static bool ZipCreateAppendData(string path, string entry, string data, bool overrideEntry = false)
179  {
180  try
181  {
182  using (var zip = File.Exists(path) ? ZipFile.Read(path) : new ZipFile(path))
183  {
184  if (zip.ContainsEntry(entry) && overrideEntry)
185  {
186  zip.RemoveEntry(entry);
187  }
188 
189  zip.AddEntry(entry, data);
190  zip.UseZip64WhenSaving = Zip64Option.Always;
191  zip.Save();
192  }
193  }
194  catch (Exception err)
195  {
196  Log.Error(err);
197  return false;
198  }
199  return true;
200  }
201 
202  /// <summary>
203  /// Append the zip data to the file-entry specified.
204  /// </summary>
205  /// <param name="path">The zip file path</param>
206  /// <param name="entry">The entry name</param>
207  /// <param name="data">The entry data</param>
208  /// <param name="overrideEntry">True if should override entry if it already exists</param>
209  /// <returns>True on success</returns>
210  public static bool ZipCreateAppendData(string path, string entry, byte[] data, bool overrideEntry = false)
211  {
212  try
213  {
214  using (var zip = File.Exists(path) ? ZipFile.Read(path) : new ZipFile(path))
215  {
216  if (overrideEntry && zip.ContainsEntry(entry))
217  {
218  zip.RemoveEntry(entry);
219  }
220 
221  zip.AddEntry(entry, data);
222  zip.UseZip64WhenSaving = Zip64Option.Always;
223  zip.Save();
224  }
225  }
226  catch (Exception err)
227  {
228  Log.Error(err, $"file: {path} entry: {entry}");
229  return false;
230  }
231  return true;
232  }
233 
234  /// <summary>
235  /// Uncompress zip data byte array into a dictionary string array of filename-contents.
236  /// </summary>
237  /// <param name="zipData">Byte data array of zip compressed information</param>
238  /// <param name="encoding">Specifies the encoding used to read the bytes. If not specified, defaults to ASCII</param>
239  /// <returns>Uncompressed dictionary string-sting of files in the zip</returns>
240  public static Dictionary<string, string> UnzipData(byte[] zipData, Encoding encoding = null)
241  {
242  using var stream = new MemoryStream(zipData);
243  return UnzipDataAsync(stream, encoding).ConfigureAwait(false).GetAwaiter().GetResult();
244  }
245 
246  /// <summary>
247  /// Uncompress zip data byte array into a dictionary string array of filename-contents.
248  /// </summary>
249  /// <param name="stream">Stream data of zip compressed information</param>
250  /// <param name="encoding">Specifies the encoding used to read the bytes. If not specified, defaults to ASCII</param>
251  /// <returns>Uncompressed dictionary string-sting of files in the zip</returns>
252  public static async Task<Dictionary<string, string>> UnzipDataAsync(Stream stream, Encoding encoding = null)
253  {
254  // Initialize:
255  var data = new Dictionary<string, string>();
256 
257  try
258  {
259  //Read out the zipped data into a string, save in array:
260  using (var zipStream = new ZipInputStream(stream))
261  {
262  while (true)
263  {
264  //Get the next file
265  var entry = zipStream.GetNextEntry();
266 
267  if (entry != null)
268  {
269  // Read the file into buffer:
270  var buffer = new byte[entry.Size];
271  await zipStream.ReadAsync(buffer, 0, (int)entry.Size).ConfigureAwait(false);
272 
273  //Save into array:
274  var str = (encoding ?? Encoding.ASCII).GetString(buffer);
275  data[entry.Name] = str;
276  }
277  else
278  {
279  break;
280  }
281  }
282  } // End Zip Stream.
283 
284  }
285  catch (Exception err)
286  {
287  Log.Error(err);
288  }
289  return data;
290  }
291 
292  /// <summary>
293  /// Performs an in memory zip of the specified bytes
294  /// </summary>
295  /// <param name="bytes">The file contents in bytes to be zipped</param>
296  /// <param name="zipEntryName">The zip entry name</param>
297  /// <returns>The zipped file as a byte array</returns>
298  public static byte[] ZipBytes(byte[] bytes, string zipEntryName)
299  {
300  using (var memoryStream = new MemoryStream())
301  {
302  using (var archive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true))
303  {
304  var entry = archive.CreateEntry(zipEntryName);
305  using (var entryStream = entry.Open())
306  {
307  entryStream.Write(bytes, 0, bytes.Length);
308  }
309  }
310  // 'ToArray' after disposing of 'ZipArchive' since it finishes writing all the data
311  return memoryStream.ToArray();
312  }
313  }
314 
315  /// <summary>
316  /// Extract .gz files to disk
317  /// </summary>
318  /// <param name="gzipFileName"></param>
319  /// <param name="targetDirectory"></param>
320  public static string UnGZip(string gzipFileName, string targetDirectory)
321  {
322  // Use a 4K buffer. Any larger is a waste.
323  var dataBuffer = new byte[4096];
324  var newFileOutput = Path.Combine(targetDirectory, Path.GetFileNameWithoutExtension(gzipFileName));
325  using (Stream fileStream = new FileStream(gzipFileName, FileMode.Open, FileAccess.Read))
326  using (var gzipStream = new GZipInputStream(fileStream))
327  using (var fileOutput = File.Create(newFileOutput))
328  {
329  StreamUtils.Copy(gzipStream, fileOutput, dataBuffer);
330  }
331  return newFileOutput;
332  }
333 
334  /// <summary>
335  /// Compress a given file and delete the original file. Automatically rename the file to name.zip.
336  /// </summary>
337  /// <param name="textPath">Path of the original file</param>
338  /// <param name="zipEntryName">The name of the entry inside the zip file</param>
339  /// <param name="deleteOriginal">Boolean flag to delete the original file after completion</param>
340  /// <returns>String path for the new zip file</returns>
341  public static string Zip(string textPath, string zipEntryName, bool deleteOriginal = true)
342  {
343  var zipPath = textPath.Replace(".csv", ".zip").Replace(".txt", ".zip");
344  Zip(textPath, zipPath, zipEntryName, deleteOriginal);
345  return zipPath;
346  }
347 
348  /// <summary>
349  /// Compresses the specified source file.
350  /// </summary>
351  /// <param name="source">The source file to be compressed</param>
352  /// <param name="destination">The destination zip file path</param>
353  /// <param name="zipEntryName">The zip entry name for the file</param>
354  /// <param name="deleteOriginal">True to delete the source file upon completion</param>
355  public static void Zip(string source, string destination, string zipEntryName, bool deleteOriginal)
356  {
357  try
358  {
359  var buffer = new byte[4096];
360  using (var stream = new ZipOutputStream(File.Create(destination)))
361  {
362  //Zip the text file.
363  var entry = new ZipEntry(zipEntryName);
364  stream.PutNextEntry(entry);
365 
366  using (var fs = File.OpenRead(source))
367  {
368  int sourceBytes;
369  do
370  {
371  sourceBytes = fs.Read(buffer, 0, buffer.Length);
372  stream.Write(buffer, 0, sourceBytes);
373  }
374  while (sourceBytes > 0);
375  }
376  }
377 
378  //Delete the old text file:
379  if (deleteOriginal)
380  {
381  File.Delete(source);
382  }
383  }
384  catch (Exception err)
385  {
386  Log.Error(err);
387  }
388  }
389 
390  /// <summary>
391  /// Compress a given file and delete the original file. Automatically rename the file to name.zip.
392  /// </summary>
393  /// <param name="textPath">Path of the original file</param>
394  /// <param name="deleteOriginal">Boolean flag to delete the original file after completion</param>
395  /// <returns>String path for the new zip file</returns>
396  public static string Zip(string textPath, bool deleteOriginal = true)
397  {
398  return Zip(textPath, Path.GetFileName(textPath), deleteOriginal);
399  }
400 
401  /// <summary>
402  /// Compress given data to the path given
403  /// </summary>
404  /// <param name="data">Data to write to zip</param>
405  /// <param name="zipPath">Path to write to</param>
406  /// <param name="zipEntry">Entry to save the data as</param>
407  public static void Zip(string data, string zipPath, string zipEntry)
408  {
409  using (var stream = new ZipOutputStream(File.Create(zipPath)))
410  {
411  var entry = new ZipEntry(zipEntry);
412  stream.PutNextEntry(entry);
413  var buffer = new byte[4096];
414  using (var dataReader = new MemoryStream(Encoding.Default.GetBytes(data)))
415  {
416  int sourceBytes;
417  do
418  {
419  sourceBytes = dataReader.Read(buffer, 0, buffer.Length);
420  stream.Write(buffer, 0, sourceBytes);
421  }
422  while (sourceBytes > 0);
423  }
424  }
425  }
426 
427  /// <summary>
428  /// Zips the specified directory, preserving folder structure
429  /// </summary>
430  /// <param name="directory">The directory to be zipped</param>
431  /// <param name="destination">The output zip file destination</param>
432  /// <param name="includeRootInZip">True to include the root 'directory' in the zip, false otherwise</param>
433  /// <returns>True on a successful zip, false otherwise</returns>
434  public static bool ZipDirectory(string directory, string destination, bool includeRootInZip = true)
435  {
436  try
437  {
438  if (File.Exists(destination)) File.Delete(destination);
439  System.IO.Compression.ZipFile.CreateFromDirectory(directory, destination, CompressionLevel.Fastest, includeRootInZip, new PathEncoder());
440  return true;
441  }
442  catch (Exception err)
443  {
444  Log.Error(err);
445  return false;
446  }
447  }
448 
449  /// <summary>
450  /// Encode the paths as linux format for cross platform compatibility
451  /// </summary>
452  private class PathEncoder : UTF8Encoding
453  {
454  public override byte[] GetBytes(string s)
455  {
456  s = s.Replace("\\", "/");
457  return base.GetBytes(s);
458  }
459  }
460 
461  /// <summary>
462  /// Unzips the specified zip file to the specified directory
463  /// </summary>
464  /// <param name="zip">The zip to be unzipped</param>
465  /// <param name="directory">The directory to place the unzipped files</param>
466  /// <param name="overwrite">Flag specifying whether or not to overwrite existing files</param>
467  public static bool Unzip(string zip, string directory, bool overwrite = false)
468  {
469  if (!File.Exists(zip)) return false;
470 
471  try
472  {
473  if (!overwrite)
474  {
475  System.IO.Compression.ZipFile.ExtractToDirectory(zip, directory);
476  }
477  else
478  {
479  using (var archive = new ZipArchive(File.OpenRead(zip)))
480  {
481  foreach (var file in archive.Entries)
482  {
483  // skip directories
484  if (string.IsNullOrEmpty(file.Name)) continue;
485  var filepath = Path.Combine(directory, file.FullName);
486  if (IsLinux) filepath = filepath.Replace(@"\", "/");
487  var outputFile = new FileInfo(filepath);
488  if (!outputFile.Directory.Exists)
489  {
490  outputFile.Directory.Create();
491  }
492  file.ExtractToFile(outputFile.FullName, true);
493  }
494  }
495  }
496 
497  return true;
498  }
499  catch (Exception err)
500  {
501  Log.Error(err);
502  return false;
503  }
504  }
505 
506  /// <summary>
507  /// Zips all files specified to a new zip at the destination path
508  /// </summary>
509  public static void ZipFiles(string destination, IEnumerable<string> files)
510  {
511  try
512  {
513  using (var zipStream = new ZipOutputStream(File.Create(destination)))
514  {
515  var buffer = new byte[4096];
516  foreach (var file in files)
517  {
518  if (!File.Exists(file))
519  {
520  Log.Trace($"ZipFiles(): File does not exist: {file}");
521  continue;
522  }
523 
524  var entry = new ZipEntry(Path.GetFileName(file));
525  zipStream.PutNextEntry(entry);
526  using (var fstream = File.OpenRead(file))
527  {
528  StreamUtils.Copy(fstream, zipStream, buffer);
529  }
530  }
531  }
532  }
533  catch (Exception err)
534  {
535  Log.Error(err);
536  }
537  }
538 
539  /// <summary>
540  /// Streams a local zip file using a streamreader.
541  /// Important: the caller must call Dispose() on the returned ZipFile instance.
542  /// </summary>
543  /// <param name="filename">Location of the original zip file</param>
544  /// <param name="zip">The ZipFile instance to be returned to the caller</param>
545  /// <returns>Stream reader of the first file contents in the zip file</returns>
546  public static StreamReader Unzip(string filename, out ZipFile zip)
547  {
548  return Unzip(filename, null, out zip);
549  }
550 
551  /// <summary>
552  /// Streams a local zip file using a streamreader.
553  /// Important: the caller must call Dispose() on the returned ZipFile instance.
554  /// </summary>
555  /// <param name="filename">Location of the original zip file</param>
556  /// <param name="zipEntryName">The zip entry name to open a reader for. Specify null to access the first entry</param>
557  /// <param name="zip">The ZipFile instance to be returned to the caller</param>
558  /// <returns>Stream reader of the first file contents in the zip file</returns>
559  public static StreamReader Unzip(string filename, string zipEntryName, out ZipFile zip)
560  {
561  StreamReader reader = null;
562  zip = null;
563 
564  try
565  {
566  if (File.Exists(filename))
567  {
568  try
569  {
570  zip = new ZipFile(filename);
571  var entry = zip.FirstOrDefault(x => zipEntryName == null || string.Compare(x.FileName, zipEntryName, StringComparison.OrdinalIgnoreCase) == 0);
572  if (entry == null)
573  {
574  // Unable to locate zip entry
575  return null;
576  }
577 
578  reader = new StreamReader(entry.OpenReader());
579  }
580  catch (Exception err)
581  {
582  Log.Error(err, "Inner try/catch");
583  if (zip != null) zip.Dispose();
584  if (reader != null) reader.Close();
585  }
586  }
587  else
588  {
589  Log.Error($"Data.UnZip(2): File doesn\'t exist: {filename}");
590  }
591  }
592  catch (Exception err)
593  {
594  Log.Error(err, "File: " + filename);
595  }
596  return reader;
597  }
598 
599  /// <summary>
600  /// Streams the unzipped file as key value pairs of file name to file contents.
601  /// NOTE: When the returned enumerable finishes enumerating, the zip stream will be
602  /// closed rendering all key value pair Value properties unaccessible. Ideally this
603  /// would be enumerated depth first.
604  /// </summary>
605  /// <remarks>
606  /// This method has the potential for a memory leak if each kvp.Value enumerable is not disposed
607  /// </remarks>
608  /// <param name="filename">The zip file to stream</param>
609  /// <returns>The stream zip contents</returns>
610  public static IEnumerable<KeyValuePair<string, List<string>>> Unzip(string filename)
611  {
612  if (!File.Exists(filename))
613  {
614  Log.Error($"Compression.Unzip(): File does not exist: {filename}");
615  return Enumerable.Empty<KeyValuePair<string, List<string>>>();
616  }
617 
618  try
619  {
620  return ReadLinesImpl(filename);
621  }
622  catch (Exception err)
623  {
624  Log.Error(err);
625  }
626  return Enumerable.Empty<KeyValuePair<string, List<string>>>();
627  }
628 
629  /// <summary>
630  /// Lazily unzips the specified stream
631  /// </summary>
632  /// <param name="stream">The zipped stream to be read</param>
633  /// <returns>An enumerable whose elements are zip entry key value pairs with
634  /// a key of the zip entry name and the value of the zip entry's file lines</returns>
635  public static IEnumerable<KeyValuePair<string, List<string>>> Unzip(Stream stream)
636  {
637  using (var zip = ZipFile.Read(stream))
638  {
639  foreach (var entry in zip)
640  {
641  yield return new KeyValuePair<string, List<string>>(entry.FileName, ReadZipEntry(entry));
642  }
643  }
644  }
645 
646  /// <summary>
647  /// Streams each line from the first zip entry in the specified zip file
648  /// </summary>
649  /// <param name="filename">The zip file path to stream</param>
650  /// <returns>An enumerable containing each line from the first unzipped entry</returns>
651  public static List<string> ReadLines(string filename)
652  {
653  if (!File.Exists(filename))
654  {
655  Log.Error($"Compression.ReadFirstZipEntry(): File does not exist: {filename}");
656  return new List<string>();
657  }
658 
659  try
660  {
661  return ReadLinesImpl(filename, firstEntryOnly: true).Single().Value;
662  }
663  catch (Exception err)
664  {
665  Log.Error(err);
666  }
667  return new List<string>();
668  }
669 
670  private static IEnumerable<KeyValuePair<string, List<string>>> ReadLinesImpl(string filename, bool firstEntryOnly = false)
671  {
672  using (var zip = ZipFile.Read(filename))
673  {
674  for (var i = 0; i < zip.Count; i++)
675  {
676  var entry = zip[i];
677  yield return new KeyValuePair<string, List<string>>(entry.FileName, ReadZipEntry(entry));
678  if (firstEntryOnly)
679  {
680  yield break;
681  }
682  }
683  }
684  }
685 
686  private static List<string> ReadZipEntry(Ionic.Zip.ZipEntry entry)
687  {
688  var result = new List<string>();
689  using var entryReader = new StreamReader(entry.OpenReader());
690  var line = entryReader.ReadLine();
691  while (line != null)
692  {
693  result.Add(line);
694  line = entryReader.ReadLine();
695  }
696  return result;
697  }
698 
699  /// <summary>
700  /// Unzip a local file and return its contents via streamreader:
701  /// </summary>
702  public static StreamReader UnzipStreamToStreamReader(Stream zipstream)
703  {
704  StreamReader reader = null;
705  try
706  {
707  //Initialise:
708  MemoryStream file;
709 
710  //If file exists, open a zip stream for it.
711  using (var zipStream = new ZipInputStream(zipstream))
712  {
713  //Read the file entry into buffer:
714  var entry = zipStream.GetNextEntry();
715  var buffer = new byte[entry.Size];
716  zipStream.Read(buffer, 0, (int)entry.Size);
717 
718  //Load the buffer into a memory stream.
719  file = new MemoryStream(buffer);
720  }
721 
722  //Open the memory stream with a stream reader.
723  reader = new StreamReader(file);
724  }
725  catch (Exception err)
726  {
727  Log.Error(err);
728  }
729 
730  return reader;
731  } // End UnZip
732 
733  /// <summary>
734  /// Unzip a stream that represents a zip file and return the first entry as a stream
735  /// </summary>
736  public static Stream UnzipStream(Stream zipstream, out ZipFile zipFile, string entryName = null)
737  {
738  zipFile = ZipFile.Read(zipstream);
739 
740  try
741  {
742  Ionic.Zip.ZipEntry entry;
743  if (string.IsNullOrEmpty(entryName))
744  {
745  //Read the file entry into buffer:
746  entry = zipFile.Entries.FirstOrDefault();
747  }
748  else
749  {
750  // Attempt to find our specific entry
751  if (!zipFile.ContainsEntry(entryName))
752  {
753  return null;
754  }
755  entry = zipFile[entryName];
756  }
757 
758  if (entry != null)
759  {
760  return entry.OpenReader();
761  }
762  }
763  catch (Exception err)
764  {
765  Log.Error(err);
766  }
767 
768  return null;
769  } // End UnZip
770 
771  /// <summary>
772  /// Unzip the given byte array and return the created file names.
773  /// </summary>
774  /// <param name="zipData">A byte array containing the zip</param>
775  /// <param name="outputFolder">The target output folder</param>
776  /// <returns>List of unzipped file names</returns>
777  public static List<string> UnzipToFolder(byte[] zipData, string outputFolder)
778  {
779  var stream = new MemoryStream(zipData);
780  return UnzipToFolder(stream, outputFolder);
781  }
782 
783  /// <summary>
784  /// Unzip a local file and return the created file names
785  /// </summary>
786  /// <param name="zipFile">Location of the zip on the HD</param>
787  /// <returns>List of unzipped file names</returns>
788  public static List<string> UnzipToFolder(string zipFile)
789  {
790  var outFolder = Path.GetDirectoryName(zipFile);
791  var stream = File.OpenRead(zipFile);
792  return UnzipToFolder(stream, outFolder);
793  }
794 
795  /// <summary>
796  /// Unzip the given data stream into the target output folder and return the created file names
797  /// </summary>
798  /// <param name="dataStream">The zip data stream</param>
799  /// <param name="outFolder">The target output folder</param>
800  /// <returns>List of unzipped file names</returns>
801  private static List<string> UnzipToFolder(Stream dataStream, string outFolder)
802  {
803  //1. Initialize:
804  var files = new List<string>();
805  if (string.IsNullOrEmpty(outFolder))
806  {
807  outFolder = Directory.GetCurrentDirectory();
808  }
809  ICSharpCode.SharpZipLib.Zip.ZipFile zf = null;
810 
811  try
812  {
813  zf = new ICSharpCode.SharpZipLib.Zip.ZipFile(dataStream);
814 
815  foreach (ZipEntry zipEntry in zf)
816  {
817  //Ignore Directories
818  if (!zipEntry.IsFile) continue;
819 
820  var buffer = new byte[4096]; // 4K is optimum
821  var zipStream = zf.GetInputStream(zipEntry);
822 
823  // Manipulate the output filename here as desired.
824  var fullZipToPath = Path.Combine(outFolder, zipEntry.Name);
825 
826  var targetFile = new FileInfo(fullZipToPath);
827  if (targetFile.Directory != null && !targetFile.Directory.Exists)
828  {
829  targetFile.Directory.Create();
830  }
831 
832  //Save the file name for later:
833  files.Add(fullZipToPath);
834 
835  //Copy the data in buffer chunks
836  using (var streamWriter = File.Create(fullZipToPath))
837  {
838  StreamUtils.Copy(zipStream, streamWriter, buffer);
839  }
840  }
841  }
842  catch
843  {
844  // lets catch the exception just to log some information about the zip file
845  Log.Error($"Compression.UnzipToFolder(): Failure: outFolder: {outFolder} - files: {string.Join(",", files)}");
846  throw;
847  }
848  finally
849  {
850  if (zf != null)
851  {
852  zf.IsStreamOwner = true; // Makes close also shut the underlying stream
853  zf.Close(); // Ensure we release resources
854  }
855  }
856  return files;
857  } // End UnZip
858 
859  /// <summary>
860  /// Extracts all file from a zip archive and copies them to a destination folder.
861  /// </summary>
862  /// <param name="source">The source zip file.</param>
863  /// <param name="destination">The destination folder to extract the file to.</param>
864  public static void UnTarFiles(string source, string destination)
865  {
866  var inStream = File.OpenRead(source);
867  var tarArchive = TarArchive.CreateInputTarArchive(inStream);
868  tarArchive.ExtractContents(destination);
869  tarArchive.Close();
870  inStream.Close();
871  }
872 
873  /// <summary>
874  /// Extract tar.gz files to disk
875  /// </summary>
876  /// <param name="source">Tar.gz source file</param>
877  /// <param name="destination">Location folder to unzip to</param>
878  public static void UnTarGzFiles(string source, string destination)
879  {
880  var inStream = File.OpenRead(source);
881  var gzipStream = new GZipInputStream(inStream);
882  var tarArchive = TarArchive.CreateInputTarArchive(gzipStream);
883  tarArchive.ExtractContents(destination);
884  tarArchive.Close();
885  gzipStream.Close();
886  inStream.Close();
887  }
888 
889  /// <summary>
890  /// Enumerate through the files of a TAR and get a list of KVP names-byte arrays
891  /// </summary>
892  /// <param name="stream">The input tar stream</param>
893  /// <param name="isTarGz">True if the input stream is a .tar.gz or .tgz</param>
894  /// <returns>An enumerable containing each tar entry and it's contents</returns>
895  public static IEnumerable<KeyValuePair<string, byte[]>> UnTar(Stream stream, bool isTarGz)
896  {
897  using (var tar = new TarInputStream(isTarGz ? (Stream)new GZipInputStream(stream) : stream))
898  {
899  TarEntry entry;
900  while ((entry = tar.GetNextEntry()) != null)
901  {
902  if (entry.IsDirectory) continue;
903 
904  using (var output = new MemoryStream())
905  {
906  tar.CopyEntryContents(output);
907  yield return new KeyValuePair<string, byte[]>(entry.Name, output.ToArray());
908  }
909  }
910  }
911  }
912 
913  /// <summary>
914  /// Enumerate through the files of a TAR and get a list of KVP names-byte arrays.
915  /// </summary>
916  /// <param name="source"></param>
917  /// <returns></returns>
918  public static IEnumerable<KeyValuePair<string, byte[]>> UnTar(string source)
919  {
920  //This is a tar.gz file.
921  var gzip = (source.Substring(Math.Max(0, source.Length - 6)) == "tar.gz");
922 
923  using (var file = File.OpenRead(source))
924  {
925  var tarIn = new TarInputStream(file);
926 
927  if (gzip)
928  {
929  var gzipStream = new GZipInputStream(file);
930  tarIn = new TarInputStream(gzipStream);
931  }
932 
933  TarEntry tarEntry;
934  while ((tarEntry = tarIn.GetNextEntry()) != null)
935  {
936  if (tarEntry.IsDirectory) continue;
937 
938  using (var stream = new MemoryStream())
939  {
940  tarIn.CopyEntryContents(stream);
941  yield return new KeyValuePair<string, byte[]>(tarEntry.Name, stream.ToArray());
942  }
943  }
944  tarIn.Close();
945  }
946  }
947 
948  /// <summary>
949  /// Validates whether the zip is corrupted or not
950  /// </summary>
951  /// <param name="path">Path to the zip file</param>
952  /// <returns>true if archive tests ok; false otherwise.</returns>
953  public static bool ValidateZip(string path)
954  {
955  using (var zip = new ICSharpCode.SharpZipLib.Zip.ZipFile(path))
956  {
957  return zip.TestArchive(true);
958  }
959  }
960 
961  /// <summary>
962  /// Returns the entry file names contained in a zip file
963  /// </summary>
964  /// <param name="zipFileName">The zip file name</param>
965  /// <returns>An IEnumerable of entry file names</returns>
966  public static IEnumerable<string> GetZipEntryFileNames(string zipFileName)
967  {
968  using (var zip = ZipFile.Read(zipFileName))
969  {
970  return zip.EntryFileNames;
971  }
972  }
973 
974  /// <summary>
975  /// Return the entry file names contained in a zip file
976  /// </summary>
977  /// <param name="zipFileStream">Stream to the file</param>
978  /// <returns>IEnumerable of entry file names</returns>
979  public static IEnumerable<string> GetZipEntryFileNames(Stream zipFileStream)
980  {
981  using (var zip = ZipFile.Read(zipFileStream))
982  {
983  return zip.EntryFileNames;
984  }
985  }
986 
987  /// <summary>
988  /// Extracts a 7-zip archive to disk, using the 7-zip CLI utility
989  /// </summary>
990  /// <param name="inputFile">Path to the 7z file</param>
991  /// <param name="outputDirectory">Directory to output contents of 7z</param>
992  /// <param name="execTimeout">Timeout in seconds for how long we should wait for the extraction to complete</param>
993  /// <exception cref="Exception">The extraction failed because of a timeout or the exit code was not 0</exception>
994  public static void Extract7ZipArchive(string inputFile, string outputDirectory, int execTimeout = 60000)
995  {
996  var zipper = IsLinux ? "7z" : "C:/Program Files/7-Zip/7z.exe";
997  var psi = new ProcessStartInfo(zipper, " e " + inputFile + " -o" + outputDirectory)
998  {
999  CreateNoWindow = true,
1000  WindowStyle = ProcessWindowStyle.Hidden,
1001  UseShellExecute = false,
1002  RedirectStandardOutput = false
1003  };
1004 
1005  var process = new Process();
1006  process.StartInfo = psi;
1007  process.Start();
1008 
1009  if (!process.WaitForExit(execTimeout))
1010  {
1011  throw new TimeoutException($"Timed out extracting 7Zip archive: {inputFile} ({execTimeout} seconds)");
1012  }
1013  if (process.ExitCode > 0)
1014  {
1015  throw new Exception($"Compression.Extract7ZipArchive(): 7Zip exited unsuccessfully (code {process.ExitCode})");
1016  }
1017  }
1018  }
1019 }