Lean  $LEAN_TAG$
LeanDataReader.cs
1 /*
2  * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
3  * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14 */
15 
16 using System;
17 using System.Collections.Generic;
18 using System.IO;
19 using System.Linq;
20 using Ionic.Zip;
21 using NodaTime;
22 using QuantConnect.Data;
23 using QuantConnect.Logging;
25 using QuantConnect.Util;
26 
27 namespace QuantConnect.ToolBox
28 {
29  /// <summary>
30  /// This class reads data directly from disk and returns the data without the data
31  /// entering the Lean data enumeration stack
32  /// </summary>
33  public class LeanDataReader
34  {
35  private readonly DateTime _date;
36  private readonly string _zipPath;
37  private readonly string _zipentry;
38  private readonly SubscriptionDataConfig _config;
39 
40  /// <summary>
41  /// The LeanDataReader constructor
42  /// </summary>
43  /// <param name="config">The <see cref="SubscriptionDataConfig"/></param>
44  /// <param name="symbol">The <see cref="Symbol"/> that will be read</param>
45  /// <param name="resolution">The <see cref="Resolution"/> that will be read</param>
46  /// <param name="date">The <see cref="DateTime"/> that will be read</param>
47  /// <param name="dataFolder">The root data folder</param>
48  public LeanDataReader(SubscriptionDataConfig config, Symbol symbol, Resolution resolution, DateTime date, string dataFolder)
49  {
50  _date = date;
51  _zipPath = LeanData.GenerateZipFilePath(dataFolder, symbol, date, resolution, config.TickType);
52  _zipentry = LeanData.GenerateZipEntryName(symbol, date, resolution, config.TickType);
53  _config = config;
54  }
55 
56  /// <summary>
57  /// Initialize a instance of LeanDataReader from a path to a zipped data file.
58  /// It also supports declaring the zip entry CSV file for options and futures.
59  /// </summary>
60  /// <param name="filepath">Absolute or relative path to a zipped data file, optionally the zip entry file can be declared by using '#' as separator.</param>
61  /// <example>
62  /// var dataReader = LeanDataReader("../relative/path/to/file.zip")
63  /// var dataReader = LeanDataReader("absolute/path/to/file.zip#zipEntry.csv")
64  /// </example>
65  public LeanDataReader(string filepath)
66  {
67  Symbol symbol;
68  DateTime date;
69  Resolution resolution;
70  string zipEntry = null;
71 
72  var isFutureOrOption = filepath.Contains('#', StringComparison.InvariantCulture);
73 
74  if (isFutureOrOption)
75  {
76  zipEntry = filepath.Split('#')[1];
77  filepath = filepath.Split('#')[0];
78  }
79 
80  var fileInfo = new FileInfo(filepath);
81  if (!LeanData.TryParsePath(fileInfo.FullName, out symbol, out date, out resolution, out var tickType, out var dataType))
82  {
83  throw new ArgumentException($"File {filepath} cannot be parsed.");
84  }
85 
86  if (isFutureOrOption)
87  {
88  symbol = LeanData.ReadSymbolFromZipEntry(symbol, resolution, zipEntry);
89  }
90 
91  var marketHoursDataBase = MarketHoursDatabase.FromDataFolder();
92  var dataTimeZone = marketHoursDataBase.GetDataTimeZone(symbol.ID.Market, symbol, symbol.SecurityType);
93  var exchangeTimeZone = marketHoursDataBase.GetExchangeHours(symbol.ID.Market, symbol, symbol.SecurityType).TimeZone;
94 
95  var config = new SubscriptionDataConfig(dataType, symbol, resolution,
96  dataTimeZone, exchangeTimeZone, tickType: tickType,
97  fillForward: false, extendedHours: true, isInternalFeed: true);
98 
99  _date = date;
100  _zipPath = fileInfo.FullName;
101  _zipentry = zipEntry;
102  _config = config;
103  }
104 
105  /// <summary>
106  /// Enumerate over the tick zip file and return a list of BaseData.
107  /// </summary>
108  /// <returns>IEnumerable of ticks</returns>
109  public IEnumerable<BaseData> Parse()
110  {
111  if (!File.Exists(_zipPath))
112  {
113  Log.Error($"LeanDataReader.Parse(): File does not exist: {_zipPath}");
114  yield break;
115  }
116 
117  var factory = (BaseData) ObjectActivator.GetActivator(_config.Type).Invoke(new object[0]);
118 
119  if (_config.Type.ImplementsStreamReader())
120  {
121  using (var zip = new ZipFile(_zipPath))
122  {
123  foreach (var zipEntry in zip.Where(x => _zipentry == null || string.Equals(x.FileName, _zipentry, StringComparison.OrdinalIgnoreCase)))
124  {
125  // we get the contract symbol from the zip entry if not already provided with the zip entry
126  var symbol = _config.Symbol;
127  if(_zipentry == null && (_config.SecurityType == SecurityType.Future || _config.SecurityType.IsOption()))
128  {
129  symbol = LeanData.ReadSymbolFromZipEntry(_config.Symbol, _config.Resolution, zipEntry.FileName);
130  }
131  using (var entryReader = new StreamReader(zipEntry.OpenReader()))
132  {
133  while (!entryReader.EndOfStream)
134  {
135  var dataPoint = factory.Reader(_config, entryReader, _date, false);
136  dataPoint.Symbol = symbol;
137  yield return dataPoint;
138  }
139  }
140  }
141  }
142  }
143  // for futures and options if no entry was provided we just read all
144  else if (_zipentry == null && (_config.SecurityType == SecurityType.Future || _config.SecurityType.IsOption()))
145  {
146  foreach (var entries in Compression.Unzip(_zipPath))
147  {
148  // we get the contract symbol from the zip entry
149  var symbol = LeanData.ReadSymbolFromZipEntry(_config.Symbol, _config.Resolution, entries.Key);
150  foreach (var line in entries.Value)
151  {
152  var dataPoint = factory.Reader(_config, line, _date, false);
153  dataPoint.Symbol = symbol;
154  yield return dataPoint;
155  }
156  }
157  }
158  else
159  {
160  ZipFile zipFile;
161  using (var unzipped = Compression.Unzip(_zipPath, _zipentry, out zipFile))
162  {
163  if (unzipped == null)
164  yield break;
165  string line;
166  while ((line = unzipped.ReadLine()) != null)
167  {
168  yield return factory.Reader(_config, line, _date, false);
169  }
170  }
171  zipFile.Dispose();
172  }
173  }
174 
175  /// <summary>
176  /// Returns the data time zone
177  /// </summary>
178  /// <returns><see cref="NodaTime.DateTimeZone"/> representing the data timezone</returns>
179  public DateTimeZone GetDataTimeZone()
180  {
181  return _config.DataTimeZone;
182  }
183 
184  /// <summary>
185  /// Returns the Exchange time zone
186  /// </summary>
187  /// <returns><see cref="NodaTime.DateTimeZone"/> representing the exchange timezone</returns>
188  public DateTimeZone GetExchangeTimeZone()
189  {
190  return _config.ExchangeTimeZone;
191  }
192  }
193 }